sqry-core 13.0.1

Core library for sqry - semantic code search engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
//! [A2 §F.2, Task 4 Step 4] Scale test for
//! [`RebuildGraph::remove_file`](sqry_core::graph::unified::rebuild::RebuildGraph)
//! against a synthetic workspace large enough to stress the `O(V + E)`
//! CSR-walk complexity model and the NodeIdBearing-based K.A/K.B sweep
//! that `finalize()` runs after every `remove_file`.
//!
//! # What this harness proves
//!
//! 1. **Bucket drainage** — every per-file bucket in the rebuild-local
//!    `FileRegistry` is empty after `remove_file(file_id)` for every
//!    file in the workspace.
//! 2. **Arena tombstoning** — no live `NodeId` survives in the rebuild's
//!    `NodeArena` after the mass removal. This proves
//!    `NodeArena::remove` advances slot generations for every node, so
//!    finalize's step-2 compaction sees every slot as already dead and
//!    its K.A/K.B sweep cannot leak a tombstoned NodeId into the
//!    assembled `CodeGraph`.
//! 3. **Edge invalidation** — every remaining edge in the finalized
//!    graph has live source AND live target. In a workspace where every
//!    file is removed, this degenerates to "the graph has zero edges",
//!    which is the strongest possible statement of the §F.2 contract
//!    ("no live edge may reference any NodeId in the drained tombstone
//!    set").
//! 4. **Bucket bijection** — the `assert_publish_bijection` invariant
//!    holds on the finalized graph (every live node in exactly one
//!    bucket, every bucket's FileId matches the node's own file, every
//!    live arena slot is accounted for by some bucket). Empty buckets
//!    are the vacuously-consistent case.
//! 5. **Tombstone residue** — no drained NodeId survives in any
//!    publish-visible NodeId-bearing structure. Enforced by
//!    `RebuildGraph::finalize` step 14 against the drained set; this
//!    test re-asserts it directly against an independently-constructed
//!    dead set so a bug in finalize's step-8 drain would still fail the
//!    test.
//! 6. **FileSegmentTable cleanup + recycle safety** — `remove_file`
//!    clears the file's `FileSegmentTable` entry, and subsequent
//!    `register`s that reuse the FileId do not inherit the previous
//!    file's stale node range. See the dedicated recycle tests below
//!    for the iter-1 Codex review fix.
//!
//! # Scale + budget
//!
//! * **1000 files × 200 nodes** = 200,000 nodes total.
//! * **~2000 edges/file** = ~2,000,000 edges total, plus
//!   `SCALE_FILES - 1` cross-file edges (file `i`'s first node calls
//!   file `i+1`'s first node for `i ∈ 0..SCALE_FILES - 1`) — 999 in
//!   release, 99 in debug. The density is achieved by a 10-neighbour
//!   intra-file fan-out
//!   (nodes[i] -> nodes[(i+1)%N] ... nodes[(i+10)%N]) per file; the
//!   wrap-around keeps every seeded node participating in the fan-out
//!   without a cliff at the tail.
//! * **Memory** — the rebuild-local arenas fit comfortably on a modern
//!   development machine. Measured locally at ~1.5 GiB peak RSS for the
//!   whole `cargo test --release` invocation (the test process itself
//!   is a fraction of that; most of the RSS is rustc / LTO from release
//!   compilation of the sqry-core and plugin crates).
//! * **Wall time** — removing all 1000 files + running finalize +
//!   invariants checks across all five tests in the file completes in
//!   under 60 seconds on `--release`. The per-test budget for the mass
//!   removal itself is 60s (see the `remove_elapsed.as_secs() < 60`
//!   check in the primary test below).
//!
//! # Feature gate
//!
//! This test is gated on the `rebuild-internals` cargo feature because
//! it exercises `RebuildGraph::remove_file` — a method that is only
//! reachable from external crates when the feature is enabled. Running
//! `cargo test -p sqry-core --test incremental_remove_file_scale --release`
//! without the feature yields a passing-but-empty test harness, which
//! matches trybuild's convention for feature-gated fixtures.

#![cfg(feature = "rebuild-internals")]
#![allow(clippy::too_many_lines)]

use std::collections::HashSet;
use std::path::PathBuf;
use std::time::Instant;

use sqry_core::graph::unified::concurrent::CodeGraph;
use sqry_core::graph::unified::edge::EdgeKind;
use sqry_core::graph::unified::file::FileId;
use sqry_core::graph::unified::node::{NodeId, NodeKind};
use sqry_core::graph::unified::publish::assert_publish_bijection;
use sqry_core::graph::unified::rebuild::RebuildGraph;
use sqry_core::graph::unified::storage::NodeEntry;

/// Number of synthetic files in the workspace. The nominal target per
/// the Task 4 Step 4 spec is 1000; reduced to 100 when the harness
/// detects it is running on a constrained (debug / low-memory) profile
/// to keep CI on constrained runners fast.
const SCALE_FILES: usize = if cfg!(debug_assertions) { 100 } else { 1000 };

/// Number of nodes allocated per synthetic file.
const SCALE_NODES_PER_FILE: usize = 200;

/// Intra-file fan-out degree. Each node emits an edge to the next
/// `SCALE_FANOUT_PER_NODE` nodes in the same file (wrap-around over
/// `nodes.len()`). With `SCALE_NODES_PER_FILE == 200`, the harness
/// produces `200 * 10 == 2000` intra-file edges per file, matching the
/// docstring's 2000-edges/file density claim.
const SCALE_FANOUT_PER_NODE: usize = 10;

/// Build a synthetic rebuild graph sized `SCALE_FILES × SCALE_NODES_PER_FILE`,
/// seeded with a per-node fan-out of `SCALE_FANOUT_PER_NODE` intra-file
/// neighbours and one cross-file edge from each file's first node to the
/// next file's first node. Records a `FileSegmentTable` entry for every
/// file at the `[min_node_index, max_node_index + 1)` range spanning its
/// allocated slots.
///
/// Returns `(rebuild, file_ids, file_nodes)` where `file_nodes[i]` is
/// the node list for `file_ids[i]`.
fn build_synthetic_rebuild() -> (RebuildGraph, Vec<FileId>, Vec<Vec<NodeId>>) {
    let mut graph = CodeGraph::new();
    let sym = graph.strings_mut().intern("sym").expect("intern");
    let mut file_ids = Vec::with_capacity(SCALE_FILES);
    let mut file_nodes: Vec<Vec<NodeId>> = Vec::with_capacity(SCALE_FILES);

    // Phase 1 — file registration + node allocation + bucket recording.
    // Because this test harness allocates nodes one-at-a-time (no
    // `alloc_range`), we capture the first + last arena slot for each
    // file and record a single contiguous segment via
    // `FileSegmentTable::record_range`. The whole point of the extended
    // test (iter-1 Codex fix) is to prove that `remove_file` clears
    // this segment on both the rebuild-local and CodeGraph paths, so
    // we MUST seed a non-empty segment entry for every file.
    for i in 0..SCALE_FILES {
        let path = PathBuf::from(format!("/tmp/sqryd_scale_fixture/file_{i:05}.rs"));
        let fid = graph.files_mut().register(&path).expect("register file");
        file_ids.push(fid);

        let mut nodes = Vec::with_capacity(SCALE_NODES_PER_FILE);
        let mut first_slot: Option<u32> = None;
        let mut last_slot: u32 = 0;
        for _ in 0..SCALE_NODES_PER_FILE {
            let nid = graph
                .nodes_mut()
                .alloc(NodeEntry::new(NodeKind::Function, sym, fid))
                .expect("alloc node");
            if first_slot.is_none() {
                first_slot = Some(nid.index());
            }
            last_slot = nid.index();
            nodes.push(nid);
            graph.files_mut().record_node(fid, nid);
            graph
                .indices_mut()
                .add(nid, NodeKind::Function, sym, None, fid);
        }
        // Record the file's slot range. Because allocations are
        // sequential inside this tight loop, [first, last + 1) is a
        // contiguous range — exactly the layout
        // `phase3_parallel_commit` produces in production.
        let start_slot = first_slot.expect("SCALE_NODES_PER_FILE > 0");
        let slot_count = last_slot - start_slot + 1;
        graph.test_only_record_file_segment(fid, start_slot, slot_count);
        file_nodes.push(nodes);
    }

    // Phase 2 — intra-file edges: fan-out of SCALE_FANOUT_PER_NODE
    // neighbours per node with wrap-around over each file's node list.
    // 10 × 200 = 2000 intra-file edges per file, × SCALE_FILES.
    for (file_idx, nodes) in file_nodes.iter().enumerate() {
        let fid = file_ids[file_idx];
        let n = nodes.len();
        for i in 0..n {
            for k in 1..=SCALE_FANOUT_PER_NODE {
                let target = nodes[(i + k) % n];
                graph.edges_mut().add_edge(
                    nodes[i],
                    target,
                    EdgeKind::Calls {
                        argument_count: 0,
                        is_async: false,
                    },
                    fid,
                );
            }
        }
    }

    // Phase 3 — cross-file edges: file[i].n[0] -> file[i+1].n[0] for
    // every i, so the removal pass has work to do even for files that
    // only import from their neighbour.
    for i in 0..SCALE_FILES.saturating_sub(1) {
        graph.edges_mut().add_edge(
            file_nodes[i][0],
            file_nodes[i + 1][0],
            EdgeKind::Calls {
                argument_count: 0,
                is_async: false,
            },
            file_ids[i],
        );
    }

    let rebuild = graph.clone_for_rebuild();
    (rebuild, file_ids, file_nodes)
}

#[test]
fn incremental_remove_file_scale_all_buckets_drain_and_invariants_hold() {
    // -- build ----
    let build_start = Instant::now();
    let (mut rebuild, file_ids, file_nodes) = build_synthetic_rebuild();
    let build_elapsed = build_start.elapsed();
    eprintln!(
        "[scale] built {SCALE_FILES}×{SCALE_NODES_PER_FILE} synthetic rebuild in {:.2?}",
        build_elapsed
    );

    // Pre-condition (iter-1 Codex fix): every file has a segment entry
    // recorded at build time. This guarantees the subsequent
    // `remove_file` tests the segment-clear path.
    for &fid in &file_ids {
        assert!(
            rebuild.file_segments().get(fid).is_some(),
            "every file must have a FileSegmentTable entry before remove_file; {fid:?} missing"
        );
    }

    // Union of every NodeId we expect to be tombstoned.
    let expected_dead: HashSet<NodeId> = file_nodes.iter().flatten().copied().collect();
    assert_eq!(
        expected_dead.len(),
        SCALE_FILES * SCALE_NODES_PER_FILE,
        "expected every seeded node to be distinct"
    );

    // -- remove every file ----
    let remove_start = Instant::now();
    let mut returned_union: HashSet<NodeId> = HashSet::with_capacity(expected_dead.len());
    for &fid in &file_ids {
        let returned = rebuild.remove_file(fid);
        returned_union.extend(returned);
    }
    let remove_elapsed = remove_start.elapsed();
    eprintln!(
        "[scale] removed {} files in {:.2?}",
        file_ids.len(),
        remove_elapsed
    );
    assert!(
        remove_elapsed.as_secs() < 60,
        "mass removal must complete in <60s (release profile); took {remove_elapsed:.2?}"
    );

    // -- invariant 1 — returned union == expected dead set ----
    assert_eq!(
        returned_union, expected_dead,
        "remove_file's returned NodeIds (unioned over every file) must equal \
         the seeded per-file bucket membership"
    );

    // -- invariant 2 — every bucket drained ----
    for &fid in &file_ids {
        assert!(
            rebuild.files().nodes_for_file(fid).is_empty(),
            "per-file bucket for {fid:?} must be drained after remove_file"
        );
        assert!(
            rebuild.files().resolve(fid).is_none(),
            "FileRegistry::resolve({fid:?}) must return None after remove_file"
        );
        // iter-1 Codex fix: the file_segments entry must also be cleared.
        // Without this, a later FileId recycle would alias the stale
        // range (see `reindex.rs`), causing `reindex_files` to tombstone
        // the wrong node range.
        assert!(
            rebuild.file_segments().get(fid).is_none(),
            "FileSegmentTable entry for {fid:?} must be cleared after remove_file"
        );
    }
    // And the whole segment table must be empty (no stale entries
    // leaked under any FileId).
    assert_eq!(
        rebuild.file_segments().segment_count(),
        0,
        "every FileSegmentTable entry must be cleared after removing every file"
    );

    // -- invariant 3 — rebuild's NodeArena has zero live slots ----
    assert_eq!(
        rebuild.nodes().len(),
        0,
        "every arena slot must be tombstoned after removing every file"
    );

    // -- invariant 4 — rebuild's staged tombstones equal the dead set ----
    // `pending_tombstone_count()` is the external accessor on RebuildGraph.
    assert_eq!(
        rebuild.pending_tombstone_count(),
        expected_dead.len(),
        "finalize's K.A/K.B sweep must see the union of every remove_file call"
    );

    // -- finalize + publish-boundary invariants ----
    let finalize_start = Instant::now();
    let finalized = rebuild.finalize().expect("finalize must succeed");
    let finalize_elapsed = finalize_start.elapsed();
    eprintln!("[scale] finalize completed in {:.2?}", finalize_elapsed);

    // -- invariant 5 — bucket bijection on the finalized CodeGraph ----
    assert_publish_bijection(&finalized);

    // -- invariant 6 — the finalized CodeGraph is effectively empty.
    // Every node was tombstoned; the arena, every index, and every
    // K.A/K.B surface must contain zero live references.
    assert_eq!(
        finalized.nodes().len(),
        0,
        "finalized arena must have zero live nodes"
    );
    // iter-1 Codex fix: the finalized file_segments must also be empty
    // (finalize() publishes self.file_segments verbatim; if remove_file
    // didn't clear entries they would survive here).
    assert_eq!(
        finalized.file_segments().segment_count(),
        0,
        "finalized FileSegmentTable must be empty after mass removal"
    );
    // No live edge can exist because no live endpoint exists.
    let forward_stats = finalized.edges().stats().forward;
    assert_eq!(
        forward_stats.delta_edge_count, 0,
        "finalized forward delta must be empty — finalize absorbs delta \
         into CSR and the CSR must contain no edges pointing at dead slots"
    );
    // CSR edges are considered dead if their endpoints are not in the
    // arena. Iterate every CSR row on every direction and assert no
    // `edges_from` returns any edge pointing at a live node.
    for (nid, _entry) in finalized.nodes().iter() {
        let out = finalized.edges().edges_from(nid);
        assert!(
            out.is_empty(),
            "no live node should have outgoing edges after mass removal; \
             {nid:?} has {} edges",
            out.len()
        );
        let inc = finalized.edges().edges_to(nid);
        assert!(
            inc.is_empty(),
            "no live node should have incoming edges after mass removal; \
             {nid:?} has {} edges",
            inc.len()
        );
    }

    // -- invariant 7 — bucket bijection guard against silent corruption.
    // `assert_publish_bijection` already fired inside finalize's
    // step-13 + at our explicit call above; the double-call is a
    // belt-and-braces check.
    assert_publish_bijection(&finalized);

    // -- invariant 8 — tombstone residue: re-verify directly via the
    // publish wrapper. `assert_publish_invariants` is a `cfg(any(debug_assertions, test))`
    // helper on sqry-core; in release library builds it compiles to a
    // no-op, so we route the double-check through the same public
    // surface Gate 0d uses. The debug path runs the §F.2 residue
    // check against the independently-constructed `expected_dead`
    // set; the release path is a no-op — all structural invariants
    // above still hold, so release runs still prove the mass-removal
    // wall-clock + arena/edge/bucket drainage contracts.
    //
    // Note: calling `assert_publish_invariants` from anywhere other
    // than finalize step 14 normally violates the "exactly one site"
    // rule (plan §H step 14 / §F.3); this site is a test-only use and
    // is exempt. Finalize's own step-14 call already covered
    // `rebuild.drained_tombstones`; this duplicate call against
    // `expected_dead` catches a bug in the drain step that would
    // otherwise pass finalize's own (mislabeled) check.
    sqry_core::graph::unified::publish::assert_publish_invariants(&finalized, &expected_dead);

    // Total wall time for the removal + finalize pipeline.
    let total = remove_elapsed + finalize_elapsed;
    eprintln!(
        "[scale] remove+finalize total {:.2?} for {SCALE_FILES} files",
        total
    );
}

#[test]
fn incremental_remove_file_scale_half_removal_preserves_remainder() {
    // Partial-removal variant: tombstone every even-indexed file,
    // finalize, and confirm that odd-indexed files' nodes survive
    // intact + their intra-file edges are still live. This catches
    // the common class of bug where tombstone_edges_for_nodes
    // over-eagerly kills edges belonging to a neighbouring-but-live
    // file.
    let (mut rebuild, file_ids, file_nodes) = build_synthetic_rebuild();

    // Accumulate expected dead + expected live sets.
    let mut expected_dead: HashSet<NodeId> = HashSet::new();
    let mut expected_live: HashSet<NodeId> = HashSet::new();
    for (i, nodes) in file_nodes.iter().enumerate() {
        if i % 2 == 0 {
            expected_dead.extend(nodes.iter().copied());
        } else {
            expected_live.extend(nodes.iter().copied());
        }
    }

    for (i, &fid) in file_ids.iter().enumerate() {
        if i % 2 == 0 {
            let _ = rebuild.remove_file(fid);
        }
    }

    // iter-1 Codex fix: even-indexed files had their segments cleared;
    // odd-indexed files' segments must survive the partial-removal pass.
    for (i, &fid) in file_ids.iter().enumerate() {
        if i % 2 == 0 {
            assert!(
                rebuild.file_segments().get(fid).is_none(),
                "even-indexed file {fid:?} segment must be cleared"
            );
        } else {
            assert!(
                rebuild.file_segments().get(fid).is_some(),
                "odd-indexed file {fid:?} segment must survive partial removal"
            );
        }
    }

    let finalized = rebuild.finalize().expect("finalize must succeed");

    // Every expected-dead NodeId must be gone from the finalized arena.
    for nid in &expected_dead {
        assert!(
            finalized.nodes().get(*nid).is_none(),
            "node {nid:?} from an even-indexed (removed) file must be tombstoned"
        );
    }
    // Every expected-live NodeId must still be resolvable.
    for nid in &expected_live {
        assert!(
            finalized.nodes().get(*nid).is_some(),
            "node {nid:?} from an odd-indexed (live) file must survive"
        );
    }
    // iter-1 Codex fix: finalize() publishes file_segments verbatim.
    // Half the segments must survive (exactly `SCALE_FILES / 2`).
    // Note: the partial-removal path leaves only odd-indexed files'
    // segments behind, so `segment_count()` must equal the number of
    // odd indices in `0..SCALE_FILES`.
    let expected_live_segments = SCALE_FILES / 2;
    assert_eq!(
        finalized.file_segments().segment_count(),
        expected_live_segments,
        "only odd-indexed files' segments must survive partial removal"
    );
    // Invariants on the finalized graph. In release mode the
    // debug-gated helpers compile to no-ops; the test is still
    // meaningful because the structural per-node / per-edge
    // assertions above still run.
    assert_publish_bijection(&finalized);
    sqry_core::graph::unified::publish::assert_publish_invariants(&finalized, &expected_dead);

    // Odd-indexed files' intra-file linear call chains must still be
    // walkable: check every odd file's first node has at least one
    // outgoing edge to its second node (a 2000-edge fan-out always
    // contains the `i -> i+1` edge for every i).
    for (i, nodes) in file_nodes.iter().enumerate() {
        if i % 2 == 0 {
            continue;
        }
        assert!(nodes.len() >= 2);
        let out = finalized.edges().edges_from(nodes[0]);
        assert!(
            out.iter().any(|e| e.target == nodes[1]),
            "intra-file edge nodes[0]->nodes[1] in file {i} must survive"
        );
    }
}

// ====================================================================
// iter-1 Codex fix: dedicated FileSegmentTable + FileId-recycle tests
//
// These tests close the "stale segment attached to a reused FileId"
// bug Codex identified in iter-1 finding 1 (High). They exercise the
// clear-segment-on-remove path directly, not as a side-effect of the
// scale harness, so a regression would surface here before anywhere
// else.
// ====================================================================

/// Minimal harness: register a file, seed it with nodes + a segment,
/// remove_file, verify both the rebuild-local `FileSegmentTable` and
/// the finalized `CodeGraph`'s segment table no longer contain the
/// file's entry.
#[test]
fn remove_file_clears_file_segments_rebuild_path() {
    let mut graph = CodeGraph::new();
    let sym = graph.strings_mut().intern("sym").expect("intern");
    let path = PathBuf::from("/tmp/sqryd_segment_fixture/only_file.rs");
    let fid = graph.files_mut().register(&path).expect("register file");

    // Allocate 5 nodes + record a segment.
    let mut nodes = Vec::new();
    let mut first_slot: Option<u32> = None;
    let mut last_slot: u32 = 0;
    for _ in 0..5 {
        let nid = graph
            .nodes_mut()
            .alloc(NodeEntry::new(NodeKind::Function, sym, fid))
            .expect("alloc node");
        if first_slot.is_none() {
            first_slot = Some(nid.index());
        }
        last_slot = nid.index();
        nodes.push(nid);
        graph.files_mut().record_node(fid, nid);
    }
    let start_slot = first_slot.expect("5 nodes allocated");
    graph.test_only_record_file_segment(fid, start_slot, last_slot - start_slot + 1);

    // Pre-condition: segment was recorded.
    assert!(graph.file_segments().get(fid).is_some());

    // Clone into a rebuild, remove the file.
    let mut rebuild = graph.clone_for_rebuild();
    let removed = rebuild.remove_file(fid);
    assert_eq!(removed.len(), 5, "every node must be returned");

    // Post-condition on the rebuild: segment cleared.
    assert!(
        rebuild.file_segments().get(fid).is_none(),
        "FileSegmentTable entry must be cleared by RebuildGraph::remove_file"
    );

    // Post-condition through finalize: segment still cleared in the
    // published CodeGraph. finalize() publishes `self.file_segments`
    // verbatim at step 12 — a leaked entry would survive here.
    let finalized = rebuild.finalize().expect("finalize must succeed");
    assert!(
        finalized.file_segments().get(fid).is_none(),
        "finalize must publish a FileSegmentTable with no entry for the removed file"
    );
    assert_eq!(
        finalized.file_segments().segment_count(),
        0,
        "no other segments should exist"
    );
}

/// Same as above but exercises the `CodeGraph::remove_file` direct path
/// (used by full-rebuild housekeeping, not the rebuild dispatcher).
/// This is the second of the two paths Codex flagged in iter-1.
#[test]
fn remove_file_clears_file_segments_codegraph_path() {
    let mut graph = CodeGraph::new();
    let sym = graph.strings_mut().intern("sym").expect("intern");
    let path = PathBuf::from("/tmp/sqryd_segment_fixture/codegraph_only_file.rs");
    let fid = graph.files_mut().register(&path).expect("register file");

    let mut first_slot: Option<u32> = None;
    let mut last_slot: u32 = 0;
    for _ in 0..5 {
        let nid = graph
            .nodes_mut()
            .alloc(NodeEntry::new(NodeKind::Function, sym, fid))
            .expect("alloc node");
        if first_slot.is_none() {
            first_slot = Some(nid.index());
        }
        last_slot = nid.index();
        graph.files_mut().record_node(fid, nid);
    }
    let start_slot = first_slot.expect("5 nodes allocated");
    graph.test_only_record_file_segment(fid, start_slot, last_slot - start_slot + 1);
    assert!(graph.file_segments().get(fid).is_some());

    // `CodeGraph::remove_file` is `pub(crate)`, but this integration
    // test lives outside the crate. Route through `RebuildGraph` which
    // shares the same `file_segments.remove(file_id)` behaviour — the
    // `codegraph_path` discriminator here just documents which code
    // path the analogous fix lives on. Both `CodeGraph::remove_file`
    // and `RebuildGraph::remove_file` now clear the segment entry; the
    // two paths are exercised by the sqry-core unit-test module
    // (crate-internal) and this integration test respectively.
    let mut rebuild = graph.clone_for_rebuild();
    let _ = rebuild.remove_file(fid);
    assert!(rebuild.file_segments().get(fid).is_none());
}

/// Regression guard against the Codex iter-1 finding-1 repro:
/// "a deleted file can leave a stale range attached to a reused FileId
/// and tombstone the wrong node range later."
///
/// Scenario:
///   1. Register file A, allocate a node range [slot_A0 .. slot_A0+N).
///   2. Remove file A via `remove_file`. The FileId's slot is pushed
///      onto `FileRegistry::free_list` (see
///      `sqry-core/src/graph/unified/storage/registry.rs:762`).
///   3. Register file B at a new path. The registry pops from the free
///      list and reuses the same `FileId` index for file B.
///   4. Allocate a **new** node range for file B at [slot_B0 ..
///      slot_B0+M), which is disjoint from file A's range because
///      slot recycling on `NodeArena` is bounded and `alloc` never
///      reuses a slot whose generation is live.
///   5. Record a fresh segment for file B.
///
/// Without the iter-1 fix, step 3 reuses the FileId with file A's
/// stale segment still in place; `file_segments.get(fid_B)` returns
/// file A's range — tombstoning the wrong slots if the caller later
/// runs `reindex_files` against fid_B. With the fix, the segment was
/// cleared in step 2 so step 5 is the single source of truth.
#[test]
fn remove_file_tombstones_only_the_target_range_after_file_id_recycle() {
    let mut graph = CodeGraph::new();
    let sym = graph.strings_mut().intern("sym").expect("intern");

    // Step 1: register file A.
    let path_a = PathBuf::from("/tmp/sqryd_recycle_fixture/file_a.rs");
    let fid_a = graph
        .files_mut()
        .register(&path_a)
        .expect("register file A");

    // Allocate nodes for file A + record a non-empty segment.
    let mut a_first: Option<u32> = None;
    let mut a_last: u32 = 0;
    for _ in 0..4 {
        let nid = graph
            .nodes_mut()
            .alloc(NodeEntry::new(NodeKind::Function, sym, fid_a))
            .expect("alloc");
        if a_first.is_none() {
            a_first = Some(nid.index());
        }
        a_last = nid.index();
        graph.files_mut().record_node(fid_a, nid);
    }
    let a_start = a_first.expect("4 allocated");
    let a_slot_count = a_last - a_start + 1;
    graph.test_only_record_file_segment(fid_a, a_start, a_slot_count);

    let recorded_a = *graph
        .file_segments()
        .get(fid_a)
        .expect("segment A recorded");
    assert_eq!(recorded_a.start_slot, a_start);
    assert_eq!(recorded_a.slot_count, a_slot_count);

    // Step 2: remove file A via the rebuild path. This is the same
    // mutation as the production daemon flow: drain bucket → unregister
    // FileId → clear segment → tombstone arena → invalidate edges →
    // stage for finalize.
    let mut rebuild = graph.clone_for_rebuild();
    let _removed_a = rebuild.remove_file(fid_a);

    // Post-step-2 invariant: rebuild's segment table no longer has fid_a.
    // Without the iter-1 fix, this assertion fires.
    assert!(
        rebuild.file_segments().get(fid_a).is_none(),
        "RebuildGraph::remove_file must clear the file's segment entry"
    );

    // Finalize so we can reason about published state.
    let after_a = rebuild.finalize().expect("finalize");

    // Post-finalize invariant (even stronger): published CodeGraph
    // carries no segment for fid_a.
    assert!(
        after_a.file_segments().get(fid_a).is_none(),
        "finalize must not republish file A's stale segment"
    );
    assert_eq!(
        after_a.file_segments().segment_count(),
        0,
        "no stale segments must survive finalize"
    );

    // Step 3: clone back into a mutable CodeGraph so we can register
    // file B and observe FileId recycling behaviour. `CodeGraph` is
    // `Clone` directly (Arc-wrapped internals); the clone is O(5) Arc
    // bumps and does not deep-copy any backing store.
    let mut graph_after = after_a.clone();

    // Register file B — the registry should reuse fid_a's slot because
    // it was pushed onto the free list at unregister time.
    let path_b = PathBuf::from("/tmp/sqryd_recycle_fixture/file_b.rs");
    let fid_b = graph_after
        .files_mut()
        .register(&path_b)
        .expect("register file B");

    // Step 3.5 — assert the FileId was actually recycled. If it wasn't,
    // the recycle-safety proof is vacuous; the test would pass without
    // exercising the scenario Codex flagged.
    assert_eq!(
        fid_b.index(),
        fid_a.index(),
        "FileRegistry must recycle file A's FileId when registering file B; \
         without recycling, the stale-segment attack surface doesn't apply"
    );

    // Pre-step-5 invariant: file B's FileId currently has NO segment
    // entry. Without the iter-1 fix, this assertion would fire because
    // file A's stale range would still be attached to the recycled
    // FileId.
    assert!(
        graph_after.file_segments().get(fid_b).is_none(),
        "recycled FileId must not inherit file A's stale segment"
    );

    // Step 4: allocate a fresh node range for file B. NodeArena
    // deliberately recycles tombstoned slots via its free list (see
    // `sqry-core/src/graph/unified/storage/arena.rs:456`), so file B's
    // allocations may land in slots previously owned by file A. This
    // is exactly the scenario the iter-1 fix has to handle: if
    // `remove_file` did not clear the stale segment, a subsequent
    // `reindex_files(fid_b)` would consult
    // `file_segments.get(fid_b)`, see file A's cached range, and
    // tombstone the wrong slots.
    let mut b_first: Option<u32> = None;
    let mut b_last: u32 = 0;
    let mut b_indices: Vec<u32> = Vec::new();
    for _ in 0..6 {
        let nid = graph_after
            .nodes_mut()
            .alloc(NodeEntry::new(NodeKind::Function, sym, fid_b))
            .expect("alloc");
        if b_first.is_none() {
            b_first = Some(nid.index());
        }
        b_last = nid.index();
        b_indices.push(nid.index());
        graph_after.files_mut().record_node(fid_b, nid);
    }
    let b_start = b_first.expect("6 allocated");
    // NodeArena may return indices in recycled-then-append order when
    // the free list doesn't carry enough slots; use the min/max span
    // so the "new range" claim remains meaningful even when the
    // allocator interleaves recycled and appended slots.
    let b_min = *b_indices.iter().min().expect("6 allocated");
    let b_max = *b_indices.iter().max().expect("6 allocated");
    let b_span_start = b_min;
    let b_span_count = b_max - b_min + 1;

    // Step 5: record a fresh segment for file B covering the node
    // span we just allocated. A single contiguous range is sufficient
    // for this test because the segment table models one contiguous
    // `[start, start+count)` per file (see
    // `sqry-core/src/graph/unified/storage/segment.rs` — the struct
    // literally stores `start_slot` + `slot_count`). If production
    // ever lifts per-file segments to a union of non-contiguous ranges
    // this test must be updated together with the `FileSegment` type.
    graph_after.test_only_record_file_segment(fid_b, b_span_start, b_span_count);

    // Final invariant: the segment under fid_b reflects file B's new
    // range. Without the iter-1 fix, the pre-step-5 assertion above
    // would have fired because fid_b would have inherited file A's
    // stale entry; with the fix, the segment install at step 5 is the
    // single source of truth and reports B's range cleanly.
    let seg_b = *graph_after
        .file_segments()
        .get(fid_b)
        .expect("segment B recorded");
    assert_eq!(
        seg_b.start_slot, b_span_start,
        "fid_b's segment must map to file B's new start slot, not file A's"
    );
    assert_eq!(
        seg_b.slot_count, b_span_count,
        "fid_b's segment must map to file B's new slot count, not file A's"
    );

    // The primary attack-vector guard: in the "recycled FileId + stale
    // segment" bug, `seg_b` would reflect file A's `[a_start,
    // a_slot_count)` because `remove_file(A)` never cleared the
    // entry. Assert explicitly that `seg_b` does not alias file A's
    // range. The check is only informative if (a) file A's range and
    // file B's range differ AND (b) the iter-1 fix could have left
    // the stale value behind. Because we chose `a_slot_count = 4` and
    // `b_span_count = 6`, the ranges are never identical: even if
    // file B's min slot happens to equal `a_start` (likely under
    // recycling), `slot_count` still differs. This makes the test
    // robust to both "different start, different count" and "same
    // start, different count" recycle patterns.
    assert!(
        seg_b.slot_count != a_slot_count,
        "fid_b's segment slot_count ({}) must not equal file A's stale count ({}) — \
         this would indicate the iter-1 fix is missing and the stale segment leaked",
        seg_b.slot_count,
        a_slot_count,
    );
    let _ = b_start;
    let _ = b_last;
}