arity 0.3.0

An LSP, formatter, and linter for R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
//! `arity lint` driver: walks input paths, parses, builds a semantic model,
//! runs the configured rules, filters suppressed findings, and reports.

use std::collections::HashMap;
use std::fmt;
use std::fs;
use std::path::{Path, PathBuf};

use crate::config::LintConfig;
use crate::file_discovery::{FileDiscoveryError, collect_r_files};
use crate::incremental::{
    Analysis, IncrementalDatabase, IncrementalDb, SourceFile, parsed_tree_root, semantic_model,
};
use crate::project::{
    ExternalResolution, FileScope, Project, ProjectMember, external_resolution, package_root,
    visible_symbols, workspace_project,
};
use crate::rindex::provider::IndexedProvider;
use crate::semantic::SymbolProvider;

use super::diagnostic::Diagnostic;
use super::rules::{ResolvedRules, default_symbol_provider, run_rules};
use super::suppression::SuppressionMap;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LintStatus {
    Clean,
    Findings { count: usize },
    ParseDiagnostics { count: usize },
}

#[derive(Debug, Clone)]
pub struct LintFileReport {
    pub path: PathBuf,
    pub status: LintStatus,
    pub diagnostics: Vec<Diagnostic>,
}

#[derive(Debug, Clone)]
pub struct LintResult {
    pub checked_files: usize,
    pub total_findings: usize,
    pub reports: Vec<LintFileReport>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LintError {
    MissingPaths,
    NoRFiles,
    NonRFilePath { path: PathBuf },
    WalkError { path: PathBuf, message: String },
    ReadError { path: PathBuf, source: String },
    UnknownRule { rule: String },
}

impl fmt::Display for LintError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::MissingPaths => {
                write!(
                    f,
                    "lint requires at least one input path (file or directory)"
                )
            }
            Self::NoRFiles => write!(f, "no .R files found under the provided input paths"),
            Self::NonRFilePath { path } => write!(
                f,
                "input file {} is not an .R file; lint only supports .R files",
                path.display()
            ),
            Self::WalkError { path, message } => {
                write!(f, "failed while scanning {}: {message}", path.display())
            }
            Self::ReadError { path, source } => {
                write!(f, "failed to read {}: {source}", path.display())
            }
            Self::UnknownRule { rule } => write!(f, "unknown lint rule: `{rule}`"),
        }
    }
}

impl std::error::Error for LintError {}

impl From<FileDiscoveryError> for LintError {
    fn from(value: FileDiscoveryError) -> Self {
        match value {
            FileDiscoveryError::NonRFilePath { path } => Self::NonRFilePath { path },
            FileDiscoveryError::WalkError { path, message } => Self::WalkError { path, message },
        }
    }
}

pub fn check_paths(paths: &[PathBuf]) -> Result<LintResult, LintError> {
    check_paths_with_config(paths, &LintConfig::default())
}

pub fn check_paths_with_config(
    paths: &[PathBuf],
    config: &LintConfig,
) -> Result<LintResult, LintError> {
    check_paths_with_index(paths, config, IndexedProvider::empty())
}

/// Like [`check_paths_with_config`] but with a caller-supplied harvested package
/// index, installed into salsa as the HIGH-durability [`LibraryIndex`] and used
/// by the [`external_resolution`] query. R's default packages and the bundled
/// CRAN lists are static and need not be supplied.
pub fn check_paths_with_index(
    paths: &[PathBuf],
    config: &LintConfig,
    indexed: IndexedProvider,
) -> Result<LintResult, LintError> {
    if paths.is_empty() {
        return Err(LintError::MissingPaths);
    }

    let (rules, unknown) = ResolvedRules::resolve(config.select.as_deref(), &config.ignore);
    if let Some(rule) = unknown.into_iter().next() {
        return Err(LintError::UnknownRule { rule });
    }

    let files = collect_r_files(paths).map_err(LintError::from)?;
    if files.is_empty() {
        return Err(LintError::NoRFiles);
    }

    let mut db = IncrementalDatabase::default();
    let mut tracked: HashMap<PathBuf, SourceFile> = HashMap::new();

    // Pass 1: track every file, recording parse-error counts for reporting.
    // Membership is derived from the workspace file-set below; files with parse
    // diagnostics are tracked but `workspace_project` drops them from the scope.
    let mut parse_errors: HashMap<PathBuf, usize> = HashMap::new();
    for path in &files {
        let content = fs::read_to_string(path).map_err(|err| LintError::ReadError {
            path: path.clone(),
            source: err.to_string(),
        })?;
        let file = db.upsert_file(path, content);
        tracked.insert(path.clone(), file);

        let parse_diag_count = db.parse_diagnostics(file).len();
        if parse_diag_count != 0 {
            parse_errors.insert(path.clone(), parse_diag_count);
        }
    }

    // Install the harvested index as the HIGH-durability library singleton
    // before deriving the project (which borrows `&db`). `external_resolution`
    // reads it.
    let manifest = db.set_library_index(indexed);

    // Seed the explicit workspace file-set and derive the interned project from
    // it. `workspace_project` filters to cleanly-parsing members, reads each
    // package's NAMESPACE, and interns — the same membership the inline build
    // produced, now keyed off the salsa `Workspace` input.
    db.set_workspace_members(tracked.values().copied().collect(), files.clone());
    let project = workspace_project(&db);

    // The cross-file path resolves undefined symbols through `external_resolution`
    // (which uses the salsa library index), so the provider passed to the rules is
    // only the fallback for rules that read static base-R facts (`is_base`).
    let fallback = default_symbol_provider();

    // Pass 2: lint each cleanly parsed file with its cross-file scope.
    let mut reports = Vec::new();
    let mut total_findings = 0usize;
    for path in files {
        let file = tracked[&path];
        let (status, diagnostics) = if let Some(&count) = parse_errors.get(&path) {
            (LintStatus::ParseDiagnostics { count }, Vec::new())
        } else {
            let visibility = visible_symbols(&db, project, file);
            let file_scope = visibility.scope();
            let resolution = external_resolution(&db, manifest, project, file);
            let kept = lint_parsed_file(
                &db,
                file,
                &path,
                &rules,
                &fallback,
                Some(&file_scope),
                Some(resolution),
            );
            total_findings += kept.len();
            let status = if kept.is_empty() {
                LintStatus::Clean
            } else {
                LintStatus::Findings { count: kept.len() }
            };
            (status, kept)
        };
        reports.push(LintFileReport {
            path,
            status,
            diagnostics,
        });
    }

    Ok(LintResult {
        checked_files: tracked.len(),
        total_findings,
        reports,
    })
}

/// Intern a [`Project`] from a membership snapshot. Sorts `members` by path so
/// the interned key is deterministic — an unchanged set always yields the same
/// id, which is what keeps the project-graph memo alive across body edits.
fn intern_project<'db>(
    db: &'db dyn IncrementalDb,
    mut members: Vec<ProjectMember>,
    namespaces: Vec<(PathBuf, String)>,
) -> Project<'db> {
    members.sort_by(|a, b| a.path.cmp(&b.path));
    Project::new(db, members, namespaces)
}

/// Run the resolved rules against a cleanly-parsed file, using the cached parse
/// tree and semantic model, and drop suppressed findings. Callers must have
/// already confirmed the file parses without diagnostics.
fn lint_parsed_file(
    db: &dyn IncrementalDb,
    file: SourceFile,
    path: &Path,
    rules: &ResolvedRules,
    provider: &dyn SymbolProvider,
    project: Option<&FileScope<'_>>,
    resolution: Option<&ExternalResolution>,
) -> Vec<Diagnostic> {
    let root_node = parsed_tree_root(db, file);
    let model = semantic_model(db, file);
    let mut diagnostics = run_rules(
        &rules.rules,
        path,
        &root_node,
        model,
        provider,
        project,
        resolution,
    );
    let suppress = SuppressionMap::build(&root_node);
    diagnostics.retain(|d| !suppress.is_suppressed(d.rule, d.range));
    for d in &mut diagnostics {
        d.path = path.to_path_buf();
    }
    diagnostics
}

/// Lint a file already tracked in `db`, reusing its cached parse and model.
/// Returns no findings when the file has parse diagnostics. Used by the LSP,
/// which holds a long-lived `db` so edits don't re-parse from scratch.
pub fn check_tracked_file(
    db: &IncrementalDatabase,
    file: SourceFile,
    path: &Path,
    config: &LintConfig,
    provider: &dyn SymbolProvider,
) -> Result<Vec<Diagnostic>, LintError> {
    let (rules, unknown) = ResolvedRules::resolve(config.select.as_deref(), &config.ignore);
    if let Some(rule) = unknown.into_iter().next() {
        return Err(LintError::UnknownRule { rule });
    }
    if !db.parse_diagnostics(file).is_empty() {
        return Ok(Vec::new());
    }
    Ok(lint_parsed_file(
        db, file, path, &rules, provider, None, None,
    ))
}

/// The write-phase output of cross-file linting: everything [`analyze_prepared`]
/// needs, all derivable with read-only `&db` access afterward. Produced by
/// [`prepare_document_in_project`].
///
/// Splitting the lint into a write-phase ([`prepare_document_in_project`], needs
/// `&mut db`) and a read-phase ([`analyze_prepared`], `&db` only) lets the LSP
/// run the expensive read-phase off its lint thread on a short-lived db clone,
/// where it can be cancelled by a fresher edit (see `src/lsp.rs`).
pub struct PreparedProject {
    active: SourceFile,
    rules: ResolvedRules,
    /// Cleanly-parsing project members (incl. `active`), with their tracked
    /// inputs and package roots; files with parse diagnostics are dropped, as
    /// before. Plain owned data — *not* an interned [`Project`] — because the
    /// LSP moves this across a thread boundary onto a different db handle and
    /// interns inside the read-phase ([`analyze_prepared`]).
    members: Vec<ProjectMember>,
    /// `(package_root, NAMESPACE text)` pairs, sorted by root.
    namespaces: Vec<(PathBuf, String)>,
}

/// Write-phase of cross-file linting (needs `&mut db`). Discovers the enclosing
/// project — the R package root, else the file's directory — loads its sibling
/// files into `db` (cached across calls, so unchanged siblings aren't re-parsed),
/// and reads the relevant `NAMESPACE` files. `active` must already be tracked in
/// `db` carrying the live editor buffer.
///
/// Returns `Ok(None)` when the active file has parse diagnostics (the caller
/// publishes no findings, as the old early-return did). All `db` *writes*
/// (`upsert_file`) happen here; the returned [`PreparedProject`] is then consumed
/// by the read-only [`analyze_prepared`].
pub fn prepare_document_in_project(
    db: &mut IncrementalDatabase,
    _path: &Path,
    active: SourceFile,
    config: &LintConfig,
) -> Result<Option<PreparedProject>, LintError> {
    let (rules, unknown) = ResolvedRules::resolve(config.select.as_deref(), &config.ignore);
    if let Some(rule) = unknown.into_iter().next() {
        return Err(LintError::UnknownRule { rule });
    }
    if !db.parse_diagnostics(active).is_empty() {
        return Ok(None);
    }

    // Membership comes from the explicit `Workspace` file-set (seeded by the
    // caller — the LSP's lazy seed or `seed_workspace_for`), not a per-call disk
    // walk. `workspace_project` filters to cleanly-parsing members and reads each
    // package's NAMESPACE; we snapshot its owned membership for the read-phase,
    // which re-interns it on a db clone (so the `Project<'db>` never crosses the
    // thread boundary).
    let project = workspace_project(&*db);
    let members = project.members(&*db).clone();
    let namespaces = project.namespaces(&*db).clone();

    Ok(Some(PreparedProject {
        active,
        rules,
        members,
        namespaces,
    }))
}

/// Fold the project enclosing `path` — its R package root, else its directory —
/// plus `active` into the salsa [`Workspace`](crate::incremental::Workspace)
/// file-set, so [`prepare_document_in_project`] can derive membership from it.
///
/// Walks disk once to discover siblings and unions them into the existing
/// file-set; the conditional setter
/// ([`set_workspace_members`](IncrementalDatabase::set_workspace_members)) makes
/// a repeat call with an unchanged set a no-op. The LSP calls this lazily (only
/// when the active file isn't yet a member), so the walk leaves the per-keystroke
/// path; one-shot callers ([`check_document_in_project`]) call it each time.
pub fn seed_workspace_for(db: &mut IncrementalDatabase, path: &Path, active: SourceFile) {
    let (mut files, mut roots) = match db.workspace() {
        Some(ws) => (ws.members(&*db).to_vec(), ws.roots(&*db).to_vec()),
        None => (Vec::new(), Vec::new()),
    };
    files.push(active);

    let search_dir =
        package_root(path).or_else(|| path.parent().filter(|p| p.is_dir()).map(Path::to_path_buf));
    if let Some(dir) = search_dir {
        for sibling in collect_r_files(std::slice::from_ref(&dir)).unwrap_or_default() {
            if sibling == path {
                continue;
            }
            if let Ok(text) = fs::read_to_string(&sibling) {
                files.push(db.upsert_file(&sibling, text));
            }
        }
        if !roots.contains(&dir) {
            roots.push(dir);
        }
    }
    db.set_workspace_members(files, roots);
}

/// Read-phase of cross-file linting (`&db` only — no disk, no writes). Builds the
/// per-file facts from cached models/trees, assembles the project scope, and
/// lints the active file against it. Safe to run on a db clone; salsa aborts it
/// with [`salsa::Cancelled`] (at the next tracked-query entry) if a write races.
pub fn analyze_prepared(
    analysis: &Analysis,
    prepared: &PreparedProject,
    provider: &dyn SymbolProvider,
) -> Vec<Diagnostic> {
    // One `&dyn IncrementalDb` borrow for the read-phase: a single `'db`
    // lifetime keeps the interned `Project<'db>` and `visible_symbols` aligned.
    let db = analysis.as_db();
    // Intern the project here (read-phase): the membership snapshot is plain
    // owned data in `prepared`, so this is safe on a db clone, and an unchanged
    // set re-interns to the same id — keeping the project-graph memo warm.
    let project = intern_project(db, prepared.members.clone(), prepared.namespaces.clone());
    let active_path = analysis
        .file_path(prepared.active)
        .map(Path::to_path_buf)
        .unwrap_or_default();
    let visibility = visible_symbols(db, project, prepared.active);
    let file_scope = visibility.scope();
    // Resolve undefined symbols through the salsa library index when one is
    // installed (HIGH-durability, so it survives keystrokes); else fall back to
    // the threaded `provider`. The query memoizes and backdates across body edits.
    let resolution = analysis
        .library_index()
        .map(|manifest| external_resolution(db, manifest, project, prepared.active));
    lint_parsed_file(
        db,
        prepared.active,
        &active_path,
        &prepared.rules,
        provider,
        Some(&file_scope),
        resolution,
    )
}

/// Lint `path` (already tracked in `db` as `active`, carrying the live editor
/// buffer) with cross-file resolution. Thin wrapper over the write-phase
/// ([`prepare_document_in_project`]) and read-phase ([`analyze_prepared`]); used
/// by the CLI and tests. The LSP drives the two phases separately so the
/// read-phase can run cancellably off its lint thread.
pub fn check_document_in_project(
    db: &mut IncrementalDatabase,
    path: &Path,
    active: SourceFile,
    config: &LintConfig,
    provider: &dyn SymbolProvider,
) -> Result<Vec<Diagnostic>, LintError> {
    seed_workspace_for(db, path, active);
    match prepare_document_in_project(db, path, active, config)? {
        Some(prepared) => {
            let analysis = db.snapshot();
            Ok(analyze_prepared(&analysis, &prepared, provider))
        }
        None => Ok(Vec::new()),
    }
}

/// Convenience: lint a single in-memory document by path + text (used by quick
/// fixes and tests). Builds a one-shot database; the LSP's hot lint path uses
/// [`check_tracked_file`] against its persistent database instead.
pub fn check_document(
    path: &Path,
    content: &str,
    config: &LintConfig,
) -> Result<Vec<Diagnostic>, LintError> {
    check_document_with_provider(path, content, config, &default_symbol_provider())
}

/// Like [`check_document`] but with a caller-supplied symbol provider.
pub fn check_document_with_provider(
    path: &Path,
    content: &str,
    config: &LintConfig,
    provider: &dyn SymbolProvider,
) -> Result<Vec<Diagnostic>, LintError> {
    let db = IncrementalDatabase::default();
    let file = db.add_file(content.to_string());
    check_tracked_file(&db, file, path, config, provider)
}