nornir 0.5.0

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
//! RESOLVED knowledge map — rust-analyzer IN-PROCESS (task #24 relaunch).
//!
//! The [`super::scip`] sibling parses a SCIP index that a *separately invoked*
//! `rust-analyzer scip` subprocess produced. This module honors the relaunch's
//! HARD CONSTRAINT — **no shelling** — by linking rust-analyzer **as a library**:
//!
//! - [`ra_ap_load_cargo::load_workspace_at`] loads the cargo workspace into a
//!   `RootDatabase` (the same salsa DB the IDE runs on),
//! - [`ra_ap_ide::Analysis`] (the IDE query surface) pulls fully-RESOLVED
//!   definitions ([`Analysis::file_structure`]) and references
//!   ([`Analysis::find_all_refs`]) directly from memory — trait-method dispatch,
//!   macro-generated items and type-directed resolution included.
//!
//! No `index.scip` round-trip, no child process. The extracted rows reuse the
//! [`super::scip::ScipRow`]/[`super::scip::ScipScan`] data model and the same
//! `scip_occurrences` warehouse table, so the persistence + query halves are
//! shared with the SCIP-file path. SHA-keyed (historized), like that path.
//!
//! ## Resolved-symbol identity
//!
//! ra-ap's IDE API does not hand us a SCIP moniker string through this surface.
//! Instead we synthesize a **def-site key** — `file#Lline:Ccol` of the
//! definition's navigation range — and key every reference to the def-site that
//! [`Analysis::find_all_refs`] attributed it to. That makes find-usages an exact
//! identity match on the *resolved definition*, not a name match: two unrelated
//! `name`s resolve to two distinct def-site keys, never folded. (A future pass
//! can swap this for a real SCIP moniker via `ra_ap_ide::moniker`.)
//!
//! Gated behind the `ra-ingest` cargo feature; default builds never link `ra_ap_*`.

use std::path::Path;
use std::time::Instant;

use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use uuid::Uuid;

use ra_ap_ide::{Analysis, AnalysisHost, FileId};
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
use ra_ap_project_model::CargoConfig;
use ra_ap_vfs::{Vfs, VfsPath};

use super::scip::{ScipRow, ScipScan};

/// Rayon-free, work-stealing parallel map that **consumes** its input items —
/// each `T` is moved into exactly one worker. The gatling shape for work that
/// carries per-item OWNED, `!Sync` state: here each item is a `(chunk, Analysis)`
/// pair, and rust-analyzer's `Analysis` is `Send + !Sync`, so it can't live behind
/// a shared `Fn(usize)` (what `znippy-zoomies::gatling_forkjoin::gatling_for_each`
/// takes). The items are pre-built on the main thread (where the `!Sync` host
/// lives) and moved in. `std::thread::scope` + an atomic cursor — no rayon (the
/// constellation forbids it); results returned in input order.
///
/// CANDIDATE to lift into `znippy-zoomies::gatling_forkjoin` as the owned-item
/// sibling of `gatling_for_each` (which only takes `Fn(usize) -> T + Sync`).
fn gatling_map_owned<T, R, F>(items: Vec<T>, f: F) -> Vec<R>
where
    T: Send,
    R: Send,
    F: Fn(T) -> R + Sync,
{
    let n = items.len();
    if n == 0 {
        return Vec::new();
    }
    let workers = std::thread::available_parallelism()
        .map(|w| w.get())
        .unwrap_or(1)
        .min(n);
    if workers <= 1 {
        return items.into_iter().map(f).collect();
    }

    use std::cell::UnsafeCell;
    use std::mem::MaybeUninit;
    use std::sync::atomic::{AtomicUsize, Ordering};

    // Disjoint-index slots: each index is claimed by exactly one worker via the
    // atomic cursor, so the input move-out and the output write never alias — no
    // lock needed. A thin `Sync` wrapper lets the cell vecs cross the scope.
    struct Slots<X>(Vec<UnsafeCell<MaybeUninit<X>>>);
    // SAFETY: workers only touch the unique index they claimed from `cursor`; no
    // two threads access the same cell, and all accesses complete before join.
    unsafe impl<X: Send> Sync for Slots<X> {}

    let inputs = Slots(
        items
            .into_iter()
            .map(|t| UnsafeCell::new(MaybeUninit::new(t)))
            .collect::<Vec<_>>(),
    );
    let outputs: Slots<R> =
        Slots((0..n).map(|_| UnsafeCell::new(MaybeUninit::uninit())).collect());
    let cursor = AtomicUsize::new(0);

    let inputs_ref = &inputs;
    let outputs_ref = &outputs;
    let cursor_ref = &cursor;
    let f_ref = &f;

    std::thread::scope(|s| {
        for _ in 0..workers {
            s.spawn(move || loop {
                let i = cursor_ref.fetch_add(1, Ordering::Relaxed);
                if i >= n {
                    break;
                }
                // SAFETY: `i` is claimed exactly once (atomic fetch_add), so this is
                // the sole move-out of input slot `i` and the sole write of output
                // slot `i` — no aliasing with any sibling worker.
                let item = unsafe { (*inputs_ref.0[i].get()).assume_init_read() };
                let r = f_ref(item);
                unsafe { (*outputs_ref.0[i].get()).write(r) };
            });
        }
    });

    // SAFETY: every output slot was written exactly once before the scope joined.
    // Inputs were each moved out via `assume_init_read`; dropping the now-uninit
    // `MaybeUninit` cells does not drop `T` again, so there is no double-free.
    outputs
        .0
        .into_iter()
        .map(|c| unsafe { c.into_inner().assume_init() })
        .collect()
}

/// A definition discovered in one file, with the position to query refs at.
struct DefSite {
    file_id: FileId,
    /// Byte offset of the definition's *name* (navigation range start).
    offset: ra_ap_ide::TextSize,
    label: String,
    kind: String,
    /// Workspace-relative file path of the definition.
    file: String,
    line: u32,
    col: u32,
    /// 1-based START/END line of the def's FULL node range (body span) — the
    /// enclosing range used by `scip_call_edges` for resolved caller attribution.
    enc_start_line: u32,
    enc_end_line: u32,
}

/// Run rust-analyzer IN-PROCESS over the cargo workspace at `root`, extract the
/// RESOLVED defs + references, and return a [`ScipScan`] ready to persist.
///
/// `repo`/`git_sha` tag the snapshot (the SHA-key for historization). The heavy
/// work — loading the workspace into the salsa DB — happens once; the per-symbol
/// `find_all_refs` fan-out then runs in parallel via rayon.
pub fn ingest_in_process(
    root: &Path,
    repo: &str,
    git_sha: &str,
    snapshot_id: Uuid,
    ts: DateTime<Utc>,
) -> Result<ScipScan> {
    // SELECTION: default to the single-pass resolver (O(occurrences), one token
    // walk per file with a cached `goto_definition`), which saturates all cores
    // instead of stalling on the un-parallelizable `find_all_refs` tail of a few
    // mega-referenced symbols. Set `NORNIR_SCIP_SINGLEPASS=0` to fall back to the
    // legacy per-def `find_all_refs` path below.
    let single_pass = std::env::var("NORNIR_SCIP_SINGLEPASS").map(|v| v != "0").unwrap_or(true);
    if single_pass {
        return ingest_in_process_single_pass(root, repo, git_sha, snapshot_id, ts);
    }

    let cargo_config = CargoConfig::default();
    let load_config = LoadCargoConfig {
        load_out_dirs_from_check: true,
        with_proc_macro_server: ProcMacroServerChoice::Sysroot,
        prefill_caches: true,
        // Let ra parallelize the workspace LOAD across all cores. (Was 0 to avoid
        // oversubscribing our old rayon fan-out — but rayon is gone and the load
        // runs BEFORE our gatling ref pass, so there is no overlap to guard.)
        num_worker_threads: std::thread::available_parallelism().map(|n| n.get()).unwrap_or(8),
        proc_macro_processes: 0,
    };

    // Load the workspace into a RootDatabase + Vfs — the in-process equivalent
    // of what `rust-analyzer scip` does before it emits, minus the subprocess.
    let (db, vfs, _proc_macro) = load_workspace_at(
        root,
        &cargo_config,
        &load_config,
        &|_progress| {},
    )
    .with_context(|| format!("loading cargo workspace at {} into rust-analyzer", root.display()))?;

    let host = AnalysisHost::with_database(db);
    let analysis = host.analysis();

    // The workspace files we care about: real on-disk `.rs` files under `root`.
    let root_str = root.to_string_lossy().replace('\\', "/");
    let files: Vec<(FileId, String)> = vfs
        .iter()
        .filter_map(|(file_id, path)| vfs_rel_path(path, &root_str).map(|p| (file_id, p)))
        .filter(|(_, p)| p.ends_with(".rs"))
        .collect();

    // 1. DEFINITIONS — `file_structure` gives every item's name position, fully
    //    resolved by ra-ap (it ran the IDE engine to build the DB). Collect the
    //    def-sites per file (sequential: the DB query layer is cheap here).
    let mut defs: Vec<DefSite> = Vec::new();
    for (file_id, file) in &files {
        let struct_cfg = ra_ap_ide::FileStructureConfig { exclude_locals: true };
        let Ok(nodes) = analysis.file_structure(&struct_cfg, *file_id) else { continue };
        let Ok(line_index) = analysis.file_line_index(*file_id) else { continue };
        for node in nodes {
            let off = node.navigation_range.start();
            let lc = line_index.line_col(off);
            // Full node span → enclosing range (1-based lines), the basis for
            // resolved containment-based call edges in `scip_call_edges`.
            let enc_lc_start = line_index.line_col(node.node_range.start());
            let enc_lc_end = line_index.line_col(node.node_range.end());
            defs.push(DefSite {
                file_id: *file_id,
                offset: off,
                label: node.label,
                kind: format!("{:?}", node.kind),
                file: file.clone(),
                line: lc.line + 1,
                col: lc.col + 1,
                enc_start_line: enc_lc_start.line + 1,
                enc_end_line: enc_lc_end.line + 1,
            });
        }
    }

    // 2. REFERENCES — fan out `find_all_refs` over the def-sites IN PARALLEL.
    //
    //    SPIKE FINDING: rust-analyzer's salsa `RootDatabase` (and thus
    //    `AnalysisHost` / `Analysis`) is `Send + !Sync` — its `ZalsaLocal` query
    //    stack is a `RefCell`. So we can NOT share one `Analysis` (or `&host`)
    //    across rayon workers (`map_init`/`map_with` need `Sync`/`Clone`, both
    //    absent). The salsa-correct parallel pattern: create ONE OWNED `Analysis`
    //    snapshot PER CHUNK on the main thread (each is cheap + `Send`), then move
    //    it into its rayon task. Snapshots share the immutable DB underneath, so
    //    this is real parallelism over the resolved query engine, not N reloads.
    // LOAD BALANCE: `find_all_refs` cost is wildly skewed per def — a hot `pub`
    // symbol referenced everywhere triggers a huge global search; a private fn
    // almost none. Cut MANY more chunks than cores so the work-stealing cursor
    // (`gatling_map_owned`, rayon-free) spreads the heavy defs across all cores
    // instead of stalling one unlucky chunk in a single-threaded tail. Each chunk
    // carries its OWN cheap, `Send` `Analysis` snapshot (minted here on the main
    // thread, where the `!Sync` host lives, then moved into a worker — snapshots
    // share the immutable DB, so this is not N reloads).
    let workers = std::thread::available_parallelism()
        .map(|w| w.get())
        .unwrap_or(1)
        .max(1);
    let target_chunks = workers * 8;
    let chunk_len = defs.len().div_ceil(target_chunks).max(1);
    let work: Vec<(&[DefSite], Analysis)> = defs
        .chunks(chunk_len)
        .map(|chunk| (chunk, host.analysis()))
        .collect();
    let vfs_ref = &vfs;
    let root_ref = root_str.as_str();
    let rows: Vec<ScipRow> = gatling_map_owned(work, move |(chunk, analysis)| {
        chunk
            .iter()
            .flat_map(|def| def_and_refs(&analysis, vfs_ref, root_ref, def))
            .collect::<Vec<ScipRow>>()
    })
    .into_iter()
    .flatten()
    .collect();

    Ok(ScipScan {
        snapshot_id,
        ts,
        repo: repo.to_string(),
        git_sha: git_sha.to_string(),
        rows,
    })
}

/// SINGLE-PASS resolved-symbol index — the fast replacement for the per-def
/// `find_all_refs` fan-out in [`ingest_in_process`].
///
/// The old shape was O(defs × global-search): for each of ~6.6k defs we ran a
/// whole-workspace `find_all_refs`, and a handful of mega-referenced symbols
/// formed an un-parallelizable tail (1h+ runs). This walks every file's
/// identifier tokens ONCE — O(occurrences) — resolving each token to its
/// definition via the *cached* `goto_definition` salsa query, and emits the
/// occurrence row. The fan-out is over FILES (work-stealing, near-full core
/// saturation), and it produces the SAME KIND of rows + the SAME def-site key
/// scheme, so it is a drop-in for every downstream consumer.
pub fn ingest_in_process_single_pass(
    root: &Path,
    repo: &str,
    git_sha: &str,
    snapshot_id: Uuid,
    ts: DateTime<Utc>,
) -> Result<ScipScan> {
    let cargo_config = CargoConfig::default();
    let load_config = LoadCargoConfig {
        load_out_dirs_from_check: true,
        with_proc_macro_server: ProcMacroServerChoice::Sysroot,
        prefill_caches: true,
        // Bound the in-process worker pools so the salsa DB load shares cores
        // (rayon removed; load runs before the gatling ref pass — no overlap.)
        num_worker_threads: std::thread::available_parallelism().map(|n| n.get()).unwrap_or(8),
        proc_macro_processes: 0,
    };

    // Load the workspace into a RootDatabase + Vfs — identical to the legacy path.
    let t_total = Instant::now();
    let (db, vfs, _proc_macro) = load_workspace_at(
        root,
        &cargo_config,
        &load_config,
        &|_progress| {},
    )
    .with_context(|| format!("loading cargo workspace at {} into rust-analyzer", root.display()))?;

    let host = AnalysisHost::with_database(db);
    let analysis = host.analysis();

    // The workspace files we care about: real on-disk `.rs` files under `root`.
    let root_str = root.to_string_lossy().replace('\\', "/");
    let files: Vec<(FileId, String)> = vfs
        .iter()
        .filter_map(|(file_id, path)| vfs_rel_path(path, &root_str).map(|p| (file_id, p)))
        .filter(|(_, p)| p.ends_with(".rs"))
        .collect();

    // 1. DEFINITIONS — emitted EXACTLY as the legacy path: `file_structure` gives
    //    every item's resolved name position + full-node enclosing span. (Identical
    //    def-row logic and symbol-key scheme so `scip_call_edges` keeps matching.)
    let mut def_rows: Vec<ScipRow> = Vec::new();
    for (file_id, file) in &files {
        let struct_cfg = ra_ap_ide::FileStructureConfig { exclude_locals: true };
        let Ok(nodes) = analysis.file_structure(&struct_cfg, *file_id) else { continue };
        let Ok(line_index) = analysis.file_line_index(*file_id) else { continue };
        for node in nodes {
            let off = node.navigation_range.start();
            let lc = line_index.line_col(off);
            let enc_lc_start = line_index.line_col(node.node_range.start());
            let enc_lc_end = line_index.line_col(node.node_range.end());
            let line = lc.line + 1;
            let col = lc.col + 1;
            def_rows.push(ScipRow {
                symbol: format!("{}#L{}:C{}", file, line, col),
                role: "definition".to_string(),
                is_definition: true,
                display_name: node.label,
                kind: format!("{:?}", node.kind),
                file: file.clone(),
                start_line: line,
                start_col: col,
                enc_start_line: enc_lc_start.line + 1,
                enc_end_line: enc_lc_end.line + 1,
            });
        }
    }

    // 2. REFERENCES — gatling fan-out OVER THE FILES (rayon-free; `gatling_map_owned`).
    //    `Analysis` is `Send + !Sync`, so we mint ONE owned snapshot per chunk HERE
    //    on the main thread (where the `!Sync` host lives), pair it with the chunk,
    //    and MOVE each pair into a worker. Many more chunks than cores → the
    //    work-stealing cursor balances the per-file cost skew → near-full saturation.
    let setup_secs = t_total.elapsed().as_secs_f64(); // workspace load + def-collection
    let n_defs = def_rows.len();
    let t_refs = Instant::now();
    let workers = std::thread::available_parallelism()
        .map(|w| w.get())
        .unwrap_or(1)
        .max(1);
    let target_chunks = workers * 8;
    let chunk_len = files.len().div_ceil(target_chunks).max(1);
    let work: Vec<(&[(FileId, String)], Analysis)> = files
        .chunks(chunk_len)
        .map(|chunk| (chunk, host.analysis()))
        .collect();
    let vfs_ref = &vfs;
    let root_ref = root_str.as_str();
    let ref_rows: Vec<ScipRow> = gatling_map_owned(work, move |(chunk, analysis)| {
        chunk
            .iter()
            .flat_map(|(file_id, file)| refs_in_file(&analysis, vfs_ref, root_ref, *file_id, file))
            .collect::<Vec<ScipRow>>()
    })
    .into_iter()
    .flatten()
    .collect();
    let refs_secs = t_refs.elapsed().as_secs_f64();
    let n_refs = ref_rows.len();

    // 3. MERGE — concatenate def rows + all ref rows (O(N), no re-anchoring).
    let t_merge = Instant::now();
    let mut rows = def_rows;
    rows.extend(ref_rows);
    let merge_secs = t_merge.elapsed().as_secs_f64();

    eprintln!(
        "scip single-pass: setup(load+defs) {setup_secs:.1}s ({n_defs} defs) | \
         refs {refs_secs:.1}s ({n_refs} refs) | merge {merge_secs:.3}s | \
         {} rows | total {:.1}s",
        rows.len(),
        t_total.elapsed().as_secs_f64(),
    );

    Ok(ScipScan {
        snapshot_id,
        ts,
        repo: repo.to_string(),
        git_sha: git_sha.to_string(),
        rows,
    })
}

/// Walk one file's identifier (`NameRef`) tokens, resolve each to its definition
/// via `goto_definition`, and emit a RESOLVED reference row keyed to the def-site.
///
/// Defs outside the workspace root (external crates / sysroot) are SKIPPED —
/// workspace-only scope, same as the legacy `find_all_refs` path. Each file's
/// work is wrapped in `catch_unwind` so an internal ra panic (e.g. the
/// `ExpressionStore::expr_only` one) on one bad file is skipped, not fatal.
fn refs_in_file(
    analysis: &Analysis,
    vfs: &Vfs,
    root_str: &str,
    file_id: FileId,
    occ_file: &str,
) -> Vec<ScipRow> {
    use ra_ap_syntax::{ast, AstNode};

    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
        let mut out = Vec::new();
        let Ok(source) = analysis.parse(file_id) else { return out };
        let Ok(occ_line_index) = analysis.file_line_index(file_id) else { return out };
        let goto_cfg = ra_ap_ide::GotoDefinitionConfig {
            ra_fixture: ra_ap_ide::RaFixtureConfig::default(),
        };

        // Every identifier name-ref token in the file is a candidate use-site.
        for name_ref in source.syntax().descendants().filter_map(ast::NameRef::cast) {
            let Some(tok) = name_ref.ident_token() else { continue };
            let occ_off = tok.text_range().start();
            let pos = ra_ap_ide::FilePosition { file_id, offset: occ_off };
            let Ok(Some(nav_info)) = analysis.goto_definition(pos, &goto_cfg) else { continue };
            // Take the first navigation target — the resolved definition.
            let Some(nav) = nav_info.info.into_iter().next() else { continue };

            // Def position: focus_range (the name) if present, else full_range.
            let Some(def_file) = vfs_rel_path(vfs.file_path(nav.file_id), root_str) else {
                // External/dependency def (outside the workspace root) — skip.
                continue;
            };
            let Ok(def_line_index) = analysis.file_line_index(nav.file_id) else { continue };
            let def_off = nav.focus_range.unwrap_or(nav.full_range).start();
            let def_lc = def_line_index.line_col(def_off);
            let def_line = def_lc.line + 1;
            let def_col = def_lc.col + 1;

            // Build the symbol key EXACTLY as the def path does (`file#Lline:Ccol`).
            let symbol = format!("{}#L{}:C{}", def_file, def_line, def_col);
            let occ_lc = occ_line_index.line_col(occ_off);
            out.push(ScipRow {
                symbol,
                // goto_definition gives no `ReferenceCategory`, so every use-site
                // is a plain "reference" (read/write is not cheaply available here).
                role: "reference".to_string(),
                is_definition: false,
                display_name: String::new(),
                kind: String::new(),
                file: occ_file.to_string(),
                start_line: occ_lc.line + 1,
                start_col: occ_lc.col + 1,
                // References carry no enclosing range (it belongs to the def).
                enc_start_line: 0,
                enc_end_line: 0,
            });
        }
        out
    }));
    result.unwrap_or_default()
}

/// Emit the definition row + every RESOLVED reference row for one def-site.
fn def_and_refs(
    analysis: &Analysis,
    vfs: &Vfs,
    root_str: &str,
    def: &DefSite,
) -> Vec<ScipRow> {
    // The resolved def-site key: every ref attributes back to THIS, exactly.
    let symbol = format!("{}#L{}:C{}", def.file, def.line, def.col);
    let mut out = Vec::new();

    out.push(ScipRow {
        symbol: symbol.clone(),
        role: "definition".to_string(),
        is_definition: true,
        display_name: def.label.clone(),
        kind: def.kind.clone(),
        file: def.file.clone(),
        start_line: def.line,
        start_col: def.col,
        enc_start_line: def.enc_start_line,
        enc_end_line: def.enc_end_line,
    });

    let pos = ra_ap_ide::FilePosition { file_id: def.file_id, offset: def.offset };
    // Resolved find-usages across the whole workspace (search_scope = None), no
    // test fixtures (we run on a real cargo workspace, not r-a's test harness).
    let refs_cfg = ra_ap_ide::FindAllRefsConfig {
        search_scope: None,
        ra_fixture: ra_ap_ide::RaFixtureConfig::default(),
        exclude_imports: false,
        exclude_tests: false,
    };
    let Ok(Some(refs)) = analysis.find_all_refs(pos, &refs_cfg) else {
        return out;
    };
    for res in refs {
        for (file_id, ranges) in res.references {
            let Some(path) = vfs_rel_path(vfs.file_path(file_id), root_str) else { continue };
            let Ok(line_index) = analysis.file_line_index(file_id) else { continue };
            for (range, category) in ranges {
                let lc = line_index.line_col(range.start());
                out.push(ScipRow {
                    symbol: symbol.clone(),
                    role: ref_category_label(category),
                    is_definition: false,
                    display_name: def.label.clone(),
                    kind: String::new(),
                    file: path.clone(),
                    start_line: lc.line + 1,
                    start_col: lc.col + 1,
                    // References carry no enclosing range (it belongs to the def).
                    enc_start_line: 0,
                    enc_end_line: 0,
                });
            }
        }
    }
    out
}

/// Map ra-ap's `ReferenceCategory` bitflags to the warehouse role label,
/// matching the SCIP-path vocabulary (`reference`/`write`/`read`/`import`/…).
fn ref_category_label(cat: ra_ap_ide::ReferenceCategory) -> String {
    use ra_ap_ide::ReferenceCategory as C;
    let (w, r, i, t) = (
        cat.contains(C::WRITE),
        cat.contains(C::READ),
        cat.contains(C::IMPORT),
        cat.contains(C::TEST),
    );
    // Fast path: the overwhelming majority of refs carry a SINGLE category (or
    // none → "reference"). Hand back the static label directly — this runs once
    // per emitted ref row (millions on a real ingest), so the old `Vec<&str>` +
    // `join` was two heap allocs per ref for nothing.
    let single: Option<&'static str> = match (w, r, i, t) {
        (false, false, false, false) => Some("reference"),
        (true, false, false, false) => Some("write"),
        (false, true, false, false) => Some("read"),
        (false, false, true, false) => Some("import"),
        (false, false, false, true) => Some("test"),
        _ => None,
    };
    if let Some(s) = single {
        return s.to_string();
    }
    // Rare multi-bit combo (e.g. read+write): build the composite "a+b" label.
    let mut labels: Vec<&str> = Vec::new();
    if w {
        labels.push("write");
    }
    if r {
        labels.push("read");
    }
    if i {
        labels.push("import");
    }
    if t {
        labels.push("test");
    }
    labels.join("+")
}

/// Workspace-relative, forward-slashed path for a vfs entry under `root_str`,
/// or `None` for virtual / out-of-workspace files. Borrows the vfs string and
/// returns an owned relative slice (no intermediate allocation beyond the trim).
fn vfs_rel_path(path: &VfsPath, root_str: &str) -> Option<String> {
    let p = path.as_path()?;
    let s = p.to_string();
    // Only pay the path-normalizing alloc when there's actually a backslash to
    // replace — on Unix there never is, so skip the wasted `replace` copy.
    let norm: std::borrow::Cow<'_, str> = if s.contains('\\') {
        std::borrow::Cow::Owned(s.replace('\\', "/"))
    } else {
        std::borrow::Cow::Borrowed(s.as_str())
    };
    // Return None when the file is NOT under the workspace root — i.e. a
    // dependency / sysroot source ra loaded for resolution. The old
    // `unwrap_or(&norm)` fallback returned the FULL path instead, so out-of-root
    // files were never filtered out: the ingest indexed the entire ~1342-crate
    // dependency closure (millions of defs, ~35 min) instead of our workspace.
    // The `?` restores the documented workspace-only scope.
    let rel = norm.strip_prefix(root_str)?;
    Some(rel.trim_start_matches('/').to_string())
}