arity 0.1.0

An LSP, formatter, and linter for R
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
//! Cross-file visibility: which names a file can see from the rest of its
//! project, and which of its own top-level bindings are used elsewhere.
//!
//! Two models, unified here:
//! - **Package** — files under a common package root (a directory with
//!   `DESCRIPTION` + `R/`) share one namespace: R sources them all together, so
//!   every file sees every other file's top-level bindings.
//! - **Scripts** — files relate through explicit `source()` edges. A file sees
//!   the top-level bindings of the files it (transitively) sources.
//!
//! Resolution runs in both directions:
//! - [`FileScope::resolves`] — a free read here may bind in a file we can see
//!   (so it isn't `undefined-symbol`).
//! - [`FileScope::used_elsewhere`] — a top-level binding here may be read by a
//!   file that can see us (so it isn't `unused-binding`).
//!
//! Package authoring (NAMESPACE) is folded into the same two directions:
//! `importFrom(pkg, name)` makes `name` resolve, and `export(name)` marks a
//! top-level binding as used (it's public API).
//!
//! Visibility can be *incomplete* — a `source()` target that can't be resolved
//! (dynamic argument, or a path outside the analyzed set), or a wholesale
//! `import(pkg)` whose exports we can't enumerate. Then
//! [`FileScope::resolution_incomplete`] is set and callers must stay
//! conservative (no `undefined-symbol` findings).

use std::collections::{BTreeSet, HashMap, HashSet};
use std::path::{Path, PathBuf};

use crate::project::source::{SourceEdgeKey, SourceTarget};
use crate::rindex::harvest::parse_namespace;

static EMPTY: BTreeSet<String> = BTreeSet::new();

/// One file's contribution to cross-file resolution.
#[derive(Debug, Clone)]
pub struct FileFacts {
    pub path: PathBuf,
    /// Top-level binding names this file defines
    /// (see [`crate::project::file_exports`]).
    pub exports: BTreeSet<String>,
    /// Names this file reads but does not bind locally
    /// (see [`crate::project::exports::file_free_reads`]).
    pub free_reads: BTreeSet<String>,
    /// Top-level `source()` edges this file declares (range-free).
    pub source_edges: Vec<SourceEdgeKey>,
    /// The package root this file belongs to, if any. Files sharing a root
    /// share one namespace.
    pub package_root: Option<PathBuf>,
}

/// Cross-file resolution resolved over a set of files.
#[derive(Debug, Default)]
pub struct ProjectScope {
    /// Per file: top-level names reachable from the files it can see.
    visible: HashMap<PathBuf, BTreeSet<String>>,
    /// Per file: names read by some file that can see it.
    used_by_others: HashMap<PathBuf, BTreeSet<String>>,
    /// Files whose cross-file visibility is incomplete (unresolved `source()`).
    dynamic: HashSet<PathBuf>,
}

/// One file's view of its project.
pub struct FileScope<'a> {
    visible: &'a BTreeSet<String>,
    used_by_others: &'a BTreeSet<String>,
    /// Cross-file visibility is incomplete — an unresolved `source()` or a
    /// wholesale `import(pkg)` could supply otherwise-unresolved names — so
    /// callers must not flag them.
    pub resolution_incomplete: bool,
}

impl<'a> FileScope<'a> {
    /// Construct a view directly from borrowed visibility sets. Lets the salsa
    /// [`crate::project::Visibility`] memo back a `FileScope` without going
    /// through [`ProjectScope::for_file`].
    pub fn new(
        visible: &'a BTreeSet<String>,
        used_by_others: &'a BTreeSet<String>,
        resolution_incomplete: bool,
    ) -> Self {
        Self {
            visible,
            used_by_others,
            resolution_incomplete,
        }
    }

    /// The names visible to this file from the rest of the project.
    pub fn visible_names(&self) -> &BTreeSet<String> {
        self.visible
    }

    /// The names of this file's bindings read by some file that can see it.
    pub fn used_names(&self) -> &BTreeSet<String> {
        self.used_by_others
    }

    /// True when `name` is bound at top level in a file visible from here.
    pub fn resolves(&self, name: &str) -> bool {
        self.visible.contains(name)
    }

    /// True when `name` (a top-level binding here) is read by a file that can
    /// see this one — so it isn't unused even if unread locally.
    pub fn used_elsewhere(&self, name: &str) -> bool {
        self.used_by_others.contains(name)
    }
}

impl ProjectScope {
    /// Resolve cross-file relationships for `files`. `namespaces` maps a package
    /// root to its NAMESPACE file contents, when present.
    pub fn build(files: &[FileFacts], namespaces: &HashMap<PathBuf, String>) -> Self {
        let by_path: HashMap<&Path, &FileFacts> =
            files.iter().map(|f| (f.path.as_path(), f)).collect();

        // Package members keyed by root, so package siblings see each other.
        let mut package_members: HashMap<&Path, Vec<&Path>> = HashMap::new();
        for f in files {
            if let Some(root) = &f.package_root {
                package_members
                    .entry(root.as_path())
                    .or_default()
                    .push(f.path.as_path());
            }
        }

        // For each file, the set of *other* files it can see.
        let mut sees: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
        let mut dynamic: HashSet<PathBuf> = HashSet::new();
        for f in files {
            let mut seen: HashSet<PathBuf> = HashSet::new();
            if let Some(root) = &f.package_root {
                for member in &package_members[root.as_path()] {
                    if *member != f.path {
                        seen.insert(member.to_path_buf());
                    }
                }
            }

            let mut unresolved = false;
            let mut visited: HashSet<&Path> = HashSet::from([f.path.as_path()]);
            let mut queue: Vec<&FileFacts> = vec![f];
            while let Some(cur) = queue.pop() {
                for edge in &cur.source_edges {
                    match source_dependency(edge) {
                        Dependency::Skip => {}
                        Dependency::Unresolved => unresolved = true,
                        Dependency::Path(p) => match by_path.get(p) {
                            Some(target) if visited.insert(target.path.as_path()) => {
                                seen.insert(target.path.clone());
                                queue.push(target);
                            }
                            Some(_) => {}
                            // A resolved path to a file we didn't analyze is just
                            // as opaque as a dynamic source.
                            None => unresolved = true,
                        },
                    }
                }
            }

            if unresolved {
                dynamic.insert(f.path.clone());
            }
            sees.insert(f.path.clone(), seen);
        }

        // Derive the two directions from `sees`.
        let mut visible: HashMap<PathBuf, BTreeSet<String>> = HashMap::new();
        let mut used_by_others: HashMap<PathBuf, BTreeSet<String>> = files
            .iter()
            .map(|f| (f.path.clone(), BTreeSet::new()))
            .collect();
        for f in files {
            let mut defs = BTreeSet::new();
            for seen in &sees[&f.path] {
                if let Some(target) = by_path.get(seen.as_path()) {
                    defs.extend(target.exports.iter().cloned());
                }
            }
            // `visible` is strictly cross-file; own bindings resolve locally.
            for name in &f.exports {
                defs.remove(name);
            }
            visible.insert(f.path.clone(), defs);

            // Every file `f` sees contributes `f`'s free reads to that file's
            // "used by others" set.
            for seen in &sees[&f.path] {
                if let Some(used) = used_by_others.get_mut(seen) {
                    used.extend(f.free_reads.iter().cloned());
                }
            }
        }

        // Fold NAMESPACE declarations into the same two directions: imported
        // names resolve (visible), exported names count as used (used_by_others),
        // and a wholesale `import(pkg)` makes resolution incomplete.
        for (root, text) in namespaces {
            let Some(members) = package_members.get(root.as_path()) else {
                continue;
            };
            let object_names: Vec<String> = members
                .iter()
                .filter_map(|m| by_path.get(m))
                .flat_map(|f| f.exports.iter().map(|n| n.to_string()))
                .collect();
            let info = parse_namespace(text, &object_names);
            let exported: BTreeSet<String> = info.exports.iter().cloned().collect();
            let imported: BTreeSet<String> = info.imported_names.iter().cloned().collect();
            let incomplete = !info.imported_packages.is_empty();

            for member in members {
                let path = member.to_path_buf();
                if let Some(used) = used_by_others.get_mut(&path) {
                    used.extend(exported.iter().cloned());
                }
                if let Some(vis) = visible.get_mut(&path) {
                    vis.extend(imported.iter().cloned());
                }
                if incomplete {
                    dynamic.insert(path);
                }
            }
        }

        Self {
            visible,
            used_by_others,
            dynamic,
        }
    }

    /// One file's view of the project. Files not in the analyzed set get an
    /// empty, non-dynamic scope.
    pub fn for_file(&self, path: &Path) -> FileScope<'_> {
        FileScope {
            visible: self.visible.get(path).unwrap_or(&EMPTY),
            used_by_others: self.used_by_others.get(path).unwrap_or(&EMPTY),
            resolution_incomplete: self.dynamic.contains(path),
        }
    }
}

enum Dependency<'a> {
    /// Contributes the target file's top-level bindings to global scope.
    Path(&'a Path),
    /// Unresolvable (dynamic argument); visibility is incomplete.
    Unresolved,
    /// `local = TRUE`: loads into the calling env, never global scope.
    Skip,
}

fn source_dependency(edge: &SourceEdgeKey) -> Dependency<'_> {
    match &edge.target {
        SourceTarget::Dynamic => Dependency::Unresolved,
        SourceTarget::Path(_) if edge.local => Dependency::Skip,
        SourceTarget::Path(p) => Dependency::Path(p.as_path()),
    }
}

/// Walk up from `path` to find an enclosing R package root: a directory with
/// both a `DESCRIPTION` file and an `R/` subdirectory. Touches the filesystem.
pub fn package_root(path: &Path) -> Option<PathBuf> {
    let mut dir = path.parent();
    while let Some(d) = dir {
        if d.join("DESCRIPTION").is_file() && d.join("R").is_dir() {
            return Some(d.to_path_buf());
        }
        dir = d.parent();
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    fn set(names: &[&str]) -> BTreeSet<String> {
        names.iter().map(|n| n.to_string()).collect()
    }

    fn source_path(target: &str, local: bool) -> SourceEdgeKey {
        SourceEdgeKey {
            target: SourceTarget::Path(PathBuf::from(target)),
            local,
        }
    }

    fn dynamic_edge() -> SourceEdgeKey {
        SourceEdgeKey {
            target: SourceTarget::Dynamic,
            local: false,
        }
    }

    /// Build `FileFacts` with `path`, exports, free reads, source edges, root.
    fn facts(
        path: &str,
        exp: &[&str],
        reads: &[&str],
        edges: Vec<SourceEdgeKey>,
        root: Option<&str>,
    ) -> FileFacts {
        FileFacts {
            path: PathBuf::from(path),
            exports: set(exp),
            free_reads: set(reads),
            source_edges: edges,
            package_root: root.map(PathBuf::from),
        }
    }

    fn names(set: &BTreeSet<String>) -> Vec<String> {
        let mut v: Vec<String> = set.iter().map(|s| s.to_string()).collect();
        v.sort();
        v
    }

    /// Build a scope with no NAMESPACE data.
    fn build_scope(files: &[FileFacts]) -> ProjectScope {
        ProjectScope::build(files, &HashMap::new())
    }

    #[test]
    fn package_files_share_one_namespace() {
        let files = [
            facts("/pkg/R/a.R", &["foo"], &[], vec![], Some("/pkg")),
            facts("/pkg/R/b.R", &["bar"], &["foo"], vec![], Some("/pkg")),
        ];
        let scope = build_scope(&files);
        // b reads foo, which a defines: resolves cross-file.
        assert!(scope.for_file(Path::new("/pkg/R/b.R")).resolves("foo"));
        // foo is used by b, so a's foo isn't unused.
        assert!(
            scope
                .for_file(Path::new("/pkg/R/a.R"))
                .used_elsewhere("foo")
        );
        // bar is defined by b but read by nobody.
        assert!(
            !scope
                .for_file(Path::new("/pkg/R/b.R"))
                .used_elsewhere("bar")
        );
    }

    #[test]
    fn source_closure_is_directional() {
        // a.R sources b.R: a sees bar; b does not see foo.
        let files = [
            facts(
                "/s/a.R",
                &["foo"],
                &["bar"],
                vec![source_path("/s/b.R", false)],
                None,
            ),
            facts("/s/b.R", &["bar"], &[], vec![], None),
        ];
        let scope = build_scope(&files);
        assert!(scope.for_file(Path::new("/s/a.R")).resolves("bar"));
        assert!(!scope.for_file(Path::new("/s/b.R")).resolves("foo"));
        // a reads bar, and a sees b, so b's bar is used elsewhere.
        assert!(scope.for_file(Path::new("/s/b.R")).used_elsewhere("bar"));
        assert!(!scope.for_file(Path::new("/s/a.R")).resolution_incomplete);
    }

    #[test]
    fn source_closure_is_transitive_and_cycle_safe() {
        // a -> b -> c, plus c -> a (cycle). a sees bar + baz.
        let files = [
            facts(
                "/s/a.R",
                &["foo"],
                &[],
                vec![source_path("/s/b.R", false)],
                None,
            ),
            facts(
                "/s/b.R",
                &["bar"],
                &[],
                vec![source_path("/s/c.R", false)],
                None,
            ),
            facts(
                "/s/c.R",
                &["baz"],
                &[],
                vec![source_path("/s/a.R", false)],
                None,
            ),
        ];
        let scope = build_scope(&files);
        assert_eq!(
            names(scope.for_file(Path::new("/s/a.R")).visible),
            vec!["bar", "baz"]
        );
    }

    #[test]
    fn dynamic_source_marks_scope_incomplete() {
        let files = [facts("/s/a.R", &[], &[], vec![dynamic_edge()], None)];
        let scope = build_scope(&files);
        assert!(scope.for_file(Path::new("/s/a.R")).resolution_incomplete);
    }

    #[test]
    fn source_to_unanalyzed_file_marks_scope_incomplete() {
        let files = [facts(
            "/s/a.R",
            &[],
            &[],
            vec![source_path("/s/missing.R", false)],
            None,
        )];
        let scope = build_scope(&files);
        assert!(scope.for_file(Path::new("/s/a.R")).resolution_incomplete);
    }

    #[test]
    fn local_source_neither_contributes_nor_marks_dynamic() {
        let files = [
            facts(
                "/s/a.R",
                &[],
                &["bar"],
                vec![source_path("/s/b.R", true)],
                None,
            ),
            facts("/s/b.R", &["bar"], &[], vec![], None),
        ];
        let scope = build_scope(&files);
        let a = scope.for_file(Path::new("/s/a.R"));
        assert!(!a.resolves("bar"));
        assert!(!a.resolution_incomplete);
        // A local source doesn't make b's bar "used elsewhere".
        assert!(!scope.for_file(Path::new("/s/b.R")).used_elsewhere("bar"));
    }

    fn namespaces(entries: &[(&str, &str)]) -> HashMap<PathBuf, String> {
        entries
            .iter()
            .map(|(root, text)| (PathBuf::from(*root), text.to_string()))
            .collect()
    }

    #[test]
    fn namespace_export_marks_binding_used() {
        // `foo` is exported, so it isn't unused even though no file reads it.
        let files = [facts("/pkg/R/a.R", &["foo"], &[], vec![], Some("/pkg"))];
        let ns = namespaces(&[("/pkg", "export(foo)\n")]);
        let scope = ProjectScope::build(&files, &ns);
        assert!(
            scope
                .for_file(Path::new("/pkg/R/a.R"))
                .used_elsewhere("foo")
        );
    }

    #[test]
    fn namespace_import_from_resolves_name() {
        let files = [facts("/pkg/R/a.R", &[], &["filter"], vec![], Some("/pkg"))];
        let ns = namespaces(&[("/pkg", "importFrom(dplyr, filter)\n")]);
        let scope = ProjectScope::build(&files, &ns);
        let a = scope.for_file(Path::new("/pkg/R/a.R"));
        assert!(a.resolves("filter"));
        assert!(!a.resolution_incomplete);
    }

    #[test]
    fn namespace_wholesale_import_marks_resolution_incomplete() {
        let files = [facts("/pkg/R/a.R", &[], &["abort"], vec![], Some("/pkg"))];
        let ns = namespaces(&[("/pkg", "import(rlang)\n")]);
        let scope = ProjectScope::build(&files, &ns);
        assert!(
            scope
                .for_file(Path::new("/pkg/R/a.R"))
                .resolution_incomplete
        );
    }
}