ripvec-core 4.1.15

Semantic code + document search engine. Cacheless static-embedding + cross-encoder rerank by default; optional ModernBERT/BGE transformer engines with GPU backends. Tree-sitter chunking, hybrid BM25 + PageRank, composable ranking layers.
Documentation
//! Repository-local configuration for ripvec.
//!
//! Discovers `.ripvec/config.toml` files that carry repo-local search settings
//! (extension whitelists, ignore globs). Post-v3.0.0 the cache/model fields
//! (`CacheConfig`) and the `find_repo_config` / `save` / `to_toml` helpers
//! were removed: the engine is cacheless and those items had zero non-test
//! callers after surgery.

use std::path::{Path, PathBuf};

use serde::{Deserialize, Serialize};

use crate::{Error, Result};

/// Top-level structure for `.ripvec/config.toml`.
///
/// Only the `ignore` section is consumed by the live engine. The former
/// `[cache]` section (model, version, local flag) is accepted by serde but
/// silently skipped; existing config files that still carry `[cache]` will
/// continue to parse without error.
#[derive(Debug, Serialize, Deserialize)]
pub struct RepoConfig {
    /// Index ignore settings for this repository.
    #[serde(default, skip_serializing_if = "IgnoreConfig::is_empty")]
    pub ignore: IgnoreConfig,
}

/// Ignore settings stored in `.ripvec/config.toml`.
///
/// Patterns use `.gitignore` syntax and are matched relative to the repository
/// root. Examples: `"*.jsonl"`, `"docs/generated/**"`, `"!docs/keep.md"`.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct IgnoreConfig {
    /// Additional gitignore-style patterns to exclude from the ripvec index.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub patterns: Vec<String>,
}

impl IgnoreConfig {
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.patterns.is_empty()
    }
}

impl RepoConfig {
    /// Deserialize from a TOML string.
    ///
    /// # Errors
    ///
    /// Returns [`Error::Other`] if deserialization fails.
    pub fn from_toml(s: &str) -> Result<Self> {
        toml::from_str(s)
            .map_err(|e| Error::Other(anyhow::anyhow!("failed to deserialize config: {e}")))
    }

    /// Load config from `<path>/config.toml`.
    ///
    /// `path` should be the `.ripvec/` directory.
    ///
    /// # Errors
    ///
    /// Returns [`Error::Io`] if the file cannot be read, or [`Error::Other`]
    /// on parse failure.
    pub fn load(path: &Path) -> Result<Self> {
        let file = path.join("config.toml");
        let contents = std::fs::read_to_string(&file).map_err(|source| Error::Io {
            path: file.display().to_string(),
            source,
        })?;
        Self::from_toml(&contents)
    }
}

/// Walk up the directory tree from `start` looking for `.ripvec/config.toml`.
///
/// Returns the `.ripvec/` directory and parsed config regardless of any
/// cache settings. Used for non-cache settings such as index ignores.
#[must_use]
pub fn find_config(start: &Path) -> Option<(PathBuf, RepoConfig)> {
    let mut current = start.to_path_buf();
    loop {
        let candidate = current.join(".ripvec");
        let config_file = candidate.join("config.toml");
        if config_file.exists() {
            return RepoConfig::load(&candidate)
                .ok()
                .map(|config| (candidate, config));
        }
        match current.parent() {
            Some(parent) => current = parent.to_path_buf(),
            None => return None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    #[test]
    fn round_trip_toml_ignore_only() {
        let toml_str = "[ignore]\npatterns = [\"*.jsonl\", \"docs/generated/**\"]\n";
        let cfg = RepoConfig::from_toml(toml_str).expect("deserialize");
        assert_eq!(cfg.ignore.patterns, ["*.jsonl", "docs/generated/**"]);
    }

    #[test]
    fn missing_ignore_section_defaults_to_empty_patterns() {
        // Legacy config files only have [cache]; ignore section absent defaults to empty.
        let cfg_str =
            "[cache]\nlocal = true\nmodel = \"BAAI/bge-small-en-v1.5\"\nversion = \"3\"\n";
        let cfg = RepoConfig::from_toml(cfg_str).expect("deserialize");
        assert!(cfg.ignore.patterns.is_empty());
    }

    #[test]
    fn load_from_disk() {
        let dir = TempDir::new().expect("tempdir");
        let ripvec_dir = dir.path().join(".ripvec");
        std::fs::create_dir_all(&ripvec_dir).expect("mkdir");
        let cfg_str = "[ignore]\npatterns = [\"*.log\"]\n";
        std::fs::write(ripvec_dir.join("config.toml"), cfg_str).expect("write");
        let loaded = RepoConfig::load(&ripvec_dir).expect("load");
        assert_eq!(loaded.ignore.patterns, ["*.log"]);
    }

    #[test]
    fn find_config_in_current_dir() {
        let dir = TempDir::new().expect("tempdir");
        let ripvec_dir = dir.path().join(".ripvec");
        std::fs::create_dir_all(&ripvec_dir).expect("mkdir");
        std::fs::write(
            ripvec_dir.join("config.toml"),
            "[ignore]\npatterns = [\"*.tmp\"]\n",
        )
        .expect("write");
        let found = find_config(dir.path());
        assert!(found.is_some());
        let (_, cfg) = found.unwrap();
        assert_eq!(cfg.ignore.patterns, ["*.tmp"]);
    }

    #[test]
    fn find_config_in_parent_dir() {
        let dir = TempDir::new().expect("tempdir");
        let ripvec_dir = dir.path().join(".ripvec");
        std::fs::create_dir_all(&ripvec_dir).expect("mkdir");
        std::fs::write(
            ripvec_dir.join("config.toml"),
            "[ignore]\npatterns = [\"*.tmp\"]\n",
        )
        .expect("write");
        let subdir = dir.path().join("src").join("foo");
        std::fs::create_dir_all(&subdir).expect("mkdir");
        let found = find_config(&subdir);
        assert!(found.is_some(), "should walk up to parent .ripvec");
    }

    #[test]
    fn find_config_not_found() {
        let dir = TempDir::new().expect("tempdir");
        assert!(find_config(dir.path()).is_none());
    }
}