arity 0.1.0

An LSP, formatter, and linter for R
Documentation
//! Symbol resolution against package namespaces.
//!
//! The default `StaticBaseR` provider knows the exports of R's seven default
//! packages (`base`, `utils`, `stats`, `methods`, `datasets`, `grDevices`,
//! `graphics`) — the same set R attaches on startup before any `.Rprofile` or
//! `library()` call. Symbol lists are baked in via `include_str!` from
//! `src/semantic/base_r/*.txt`, generated by `scripts/dump_base_symbols.R`.
//!
//! Non-default packages discovered via `library()` calls resolve against
//! [`BundledPackages`] — names-only export lists for the top-N CRAN packages by
//! download count, baked in via `include_str!` from
//! `src/semantic/cran/exports.txt` (generated by `scripts/dump_cran_symbols.R`,
//! ranked by `scripts/rank_cran_downloads.sh`). Packages outside that set still
//! resolve as [`PackageOrigin::Unknown`] unless locally harvested.

use std::collections::{HashMap, HashSet};
use std::sync::LazyLock;

use rowan::TextRange;
use smol_str::SmolStr;

const PACKAGE_BASE: &str = "base";
const PACKAGE_UTILS: &str = "utils";
const PACKAGE_STATS: &str = "stats";
const PACKAGE_METHODS: &str = "methods";
const PACKAGE_DATASETS: &str = "datasets";
const PACKAGE_GRDEVICES: &str = "grDevices";
const PACKAGE_GRAPHICS: &str = "graphics";

const DEFAULT_PACKAGES: &[&str] = &[
    PACKAGE_BASE,
    PACKAGE_UTILS,
    PACKAGE_STATS,
    PACKAGE_METHODS,
    PACKAGE_DATASETS,
    PACKAGE_GRDEVICES,
    PACKAGE_GRAPHICS,
];

/// A `library()` / `require()` / `requireNamespace()` call discovered in the
/// file, in source order.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LoadedPackage {
    pub name: SmolStr,
    pub range: TextRange,
}

/// Where a bare function/identifier name resolves to within the attached
/// packages. Mirrors jarl's enum of the same name.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PackageOrigin {
    /// Exactly one attached package exports this name.
    Resolved(SmolStr),
    /// Multiple attached packages export this name. The vec is in load order
    /// (first attached first); the *last* element is the package that masks
    /// the others under R's standard lookup rules.
    Ambiguous(Vec<SmolStr>),
    /// No attached package is known to export this name.
    Unknown,
}

pub trait SymbolProvider: Send + Sync {
    /// Resolve a bare name against a load-ordered list of attached packages.
    ///
    /// The provider should consider both R's seven default packages (always
    /// attached) and any packages listed in `loaded`, treating the default
    /// packages as attached *before* anything in `loaded`.
    fn origin(&self, name: &str, loaded: &[LoadedPackage]) -> PackageOrigin;

    /// True when `name` is exported by one of R's default packages.
    fn is_base(&self, name: &str) -> bool;

    /// True if this provider has *full* export knowledge for `pkg` — i.e. an
    /// unresolved name attributed to `pkg` is genuinely undefined, not merely
    /// un-indexed. Default packages always qualify; installed packages qualify
    /// once harvested into the index. Default: `false`.
    fn package_indexed(&self, pkg: &str) -> bool {
        let _ = pkg;
        false
    }
}

/// Static symbol provider backed by the baked-in default-package export lists.
#[derive(Debug)]
pub struct StaticBaseR {
    /// Maps a symbol → the list of default packages that export it. Most
    /// symbols are exported by exactly one package; a handful (e.g. `body`
    /// from base and methods) are exported by more.
    name_to_packages: HashMap<SmolStr, Vec<SmolStr>>,
    /// Set of all names exported by any default package, for fast `is_base`.
    base_names: HashSet<SmolStr>,
}

impl Default for StaticBaseR {
    fn default() -> Self {
        Self::new()
    }
}

impl StaticBaseR {
    pub fn new() -> Self {
        let mut name_to_packages: HashMap<SmolStr, Vec<SmolStr>> = HashMap::new();
        let mut base_names = HashSet::new();
        for &(pkg, list) in PACKAGE_LISTS {
            let pkg_str = SmolStr::new(pkg);
            for name in list.lines() {
                let name = name.trim();
                if name.is_empty() {
                    continue;
                }
                let name_str = SmolStr::new(name);
                name_to_packages
                    .entry(name_str.clone())
                    .or_default()
                    .push(pkg_str.clone());
                base_names.insert(name_str);
            }
        }
        Self {
            name_to_packages,
            base_names,
        }
    }
}

impl SymbolProvider for StaticBaseR {
    fn origin(&self, name: &str, loaded: &[LoadedPackage]) -> PackageOrigin {
        let mut candidates: Vec<SmolStr> = Vec::new();
        if let Some(pkgs) = self.name_to_packages.get(name) {
            candidates.extend(pkgs.iter().cloned());
        }
        // Non-default `library()` calls add nothing this pass — no manifest yet.
        let _ = loaded;
        match candidates.len() {
            0 => PackageOrigin::Unknown,
            1 => PackageOrigin::Resolved(candidates.into_iter().next().unwrap()),
            _ => PackageOrigin::Ambiguous(candidates),
        }
    }

    fn is_base(&self, name: &str) -> bool {
        self.base_names.contains(name)
    }

    fn package_indexed(&self, pkg: &str) -> bool {
        // The seven default packages are fully known via the baked-in lists.
        DEFAULT_PACKAGES.contains(&pkg)
    }
}

pub fn default_packages() -> &'static [&'static str] {
    DEFAULT_PACKAGES
}

/// Names-only export lists for the top-N CRAN packages by download count,
/// baked in from `cran/exports.txt` and parsed once.
///
/// This is the lowest-precision tier in the resolution stack: locally harvested
/// packages (version-exact) and the default base packages both take precedence.
/// It exists so `undefined-symbol` can resolve `library()`-attached packages
/// that aren't installed, without the conservative whole-file suppression.
#[derive(Debug)]
pub struct BundledPackages {
    /// package → set of exported names.
    exports: &'static HashMap<SmolStr, HashSet<SmolStr>>,
}

static BUNDLED_EXPORTS: LazyLock<HashMap<SmolStr, HashSet<SmolStr>>> =
    LazyLock::new(|| parse_bundled(include_str!("cran/exports.txt")));

/// Parse the sectioned `cran/exports.txt` format: a `[pkg]` line opens a
/// section, subsequent non-empty, non-`#` lines are that package's exports.
fn parse_bundled(text: &str) -> HashMap<SmolStr, HashSet<SmolStr>> {
    let mut map: HashMap<SmolStr, HashSet<SmolStr>> = HashMap::new();
    let mut current: Option<SmolStr> = None;
    for line in text.lines() {
        let line = line.trim();
        if line.is_empty() || line.starts_with('#') {
            continue;
        }
        if let Some(pkg) = line.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
            current = Some(SmolStr::new(pkg));
            map.entry(current.clone().unwrap()).or_default();
        } else if let Some(pkg) = &current {
            map.get_mut(pkg).unwrap().insert(SmolStr::new(line));
        }
    }
    map
}

impl Default for BundledPackages {
    fn default() -> Self {
        Self::new()
    }
}

impl BundledPackages {
    pub fn new() -> Self {
        Self {
            exports: &BUNDLED_EXPORTS,
        }
    }

    /// True if `package` is in the bundled set.
    pub fn has_package(&self, package: &str) -> bool {
        self.exports.contains_key(package)
    }

    /// True if the bundled list for `package` includes `name`.
    pub fn exports(&self, package: &str, name: &str) -> bool {
        self.exports
            .get(package)
            .is_some_and(|set| set.contains(name))
    }
}

const PACKAGE_LISTS: &[(&str, &str)] = &[
    (PACKAGE_BASE, include_str!("base_r/base.txt")),
    (PACKAGE_UTILS, include_str!("base_r/utils.txt")),
    (PACKAGE_STATS, include_str!("base_r/stats.txt")),
    (PACKAGE_METHODS, include_str!("base_r/methods.txt")),
    (PACKAGE_DATASETS, include_str!("base_r/datasets.txt")),
    (PACKAGE_GRDEVICES, include_str!("base_r/grDevices.txt")),
    (PACKAGE_GRAPHICS, include_str!("base_r/graphics.txt")),
];

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn knows_common_base_names() {
        let p = StaticBaseR::new();
        assert!(p.is_base("c"));
        assert!(p.is_base("length"));
        assert!(p.is_base("print"));
    }

    #[test]
    fn resolves_base_function() {
        let p = StaticBaseR::new();
        match p.origin("length", &[]) {
            PackageOrigin::Resolved(pkg) => assert_eq!(pkg.as_str(), "base"),
            other => panic!("expected Resolved(base), got {other:?}"),
        }
    }

    #[test]
    fn returns_unknown_for_unknown_name() {
        let p = StaticBaseR::new();
        assert_eq!(
            p.origin("not_a_real_symbol_xyz", &[]),
            PackageOrigin::Unknown
        );
    }

    #[test]
    fn knows_stats_function() {
        let p = StaticBaseR::new();
        assert!(p.is_base("lm"));
    }

    #[test]
    fn knows_datasets_lazydata() {
        let p = StaticBaseR::new();
        // `iris` lives in datasets via lazy-loaded data.
        assert!(p.is_base("iris"));
    }

    #[test]
    fn bundled_knows_curated_package() {
        let b = BundledPackages::new();
        assert!(b.has_package("data.table"));
        assert!(b.exports("data.table", "fread"));
        assert!(!b.exports("data.table", "definitely_not_a_real_export"));
    }

    #[test]
    fn bundled_unknown_package_is_absent() {
        let b = BundledPackages::new();
        assert!(!b.has_package("not_a_real_package_xyz"));
        assert!(!b.exports("not_a_real_package_xyz", "anything"));
    }

    #[test]
    fn bundled_names_are_not_base() {
        // A bundled-only export must not be reported as base R.
        let base = StaticBaseR::new();
        let bundled = BundledPackages::new();
        assert!(bundled.exports("rlang", "abort"));
        assert!(!base.is_base("abort"));
    }
}