vyctor 0.1.0

A fast CLI tool for semantic file search using vector embeddings
Documentation
//! Configuration management for Vyctor
//!
//! Handles loading, saving, and validating the `vyctor.config.toml` file.

mod schema;

#[allow(unused_imports)]
pub use schema::{
    EmbeddingConfig, EmbeddingProvider, IndexingConfig, RerankerConfig, RerankerProviderType,
    VyctorConfig, WatchConfig,
};

use anyhow::{Context, Result};
use std::path::{Path, PathBuf};

/// The name of the vyctor directory (for database and other non-tracked files)
pub const VYCTOR_DIR: &str = ".vyctor";
/// The name of the config file (lives in project root, tracked by git)
pub const CONFIG_FILE: &str = "vyctor.config.toml";
/// The name of the database file
pub const DB_FILE: &str = "index.duckdb";

/// Find the vyctor root directory by searching upward from the current directory
pub fn find_vyctor_root() -> Result<PathBuf> {
    let current = std::env::current_dir()?;
    find_vyctor_root_from(&current)
}

/// Find the vyctor root directory by searching upward from a given path
/// Looks for either vyctor.config.toml or .vyctor directory
pub fn find_vyctor_root_from(start: &Path) -> Result<PathBuf> {
    let mut current = start.to_path_buf();
    loop {
        // Check for config file first (preferred)
        let config_file = current.join(CONFIG_FILE);
        if config_file.exists() && config_file.is_file() {
            return Ok(current);
        }
        // Also check for .vyctor directory (for backwards compatibility)
        let vyctor_dir = current.join(VYCTOR_DIR);
        if vyctor_dir.exists() && vyctor_dir.is_dir() {
            return Ok(current);
        }
        if !current.pop() {
            anyhow::bail!("No vyctor.config.toml found. Run 'vyctor init' to initialize.");
        }
    }
}

/// Get the path to the vyctor directory
pub fn vyctor_dir() -> Result<PathBuf> {
    Ok(find_vyctor_root()?.join(VYCTOR_DIR))
}

/// Get the path to the config file (in project root)
pub fn config_path() -> Result<PathBuf> {
    Ok(find_vyctor_root()?.join(CONFIG_FILE))
}

/// Get the path to the database file
pub fn db_path() -> Result<PathBuf> {
    Ok(vyctor_dir()?.join(DB_FILE))
}

/// Load the configuration from the config file
pub fn load_config() -> Result<VyctorConfig> {
    let path = config_path()?;
    let contents = std::fs::read_to_string(&path)
        .with_context(|| format!("Failed to read config file: {}", path.display()))?;
    let config: VyctorConfig = toml::from_str(&contents)
        .with_context(|| format!("Failed to parse config file: {}", path.display()))?;
    config.validate()?;
    Ok(config)
}

/// Save the configuration to the config file
#[allow(dead_code)]
pub fn save_config(config: &VyctorConfig) -> Result<()> {
    let path = config_path()?;
    let contents = toml::to_string_pretty(config).context("Failed to serialize config")?;
    std::fs::write(&path, contents)
        .with_context(|| format!("Failed to write config file: {}", path.display()))?;
    Ok(())
}

/// Initialize a new vyctor project with default configuration
pub fn init_vyctor_dir(root: &Path, force: bool) -> Result<VyctorConfig> {
    let vyctor_dir = root.join(VYCTOR_DIR);
    let config_path = root.join(CONFIG_FILE);

    if config_path.exists() && !force {
        anyhow::bail!("vyctor.config.toml already exists. Use --force to reinitialize.");
    }

    // Create the .vyctor directory (for database and other non-tracked files)
    std::fs::create_dir_all(&vyctor_dir).with_context(|| {
        format!(
            "Failed to create .vyctor directory: {}",
            vyctor_dir.display()
        )
    })?;

    // Create default config with comments
    let contents = generate_default_config_template();
    std::fs::write(&config_path, contents)
        .with_context(|| format!("Failed to write config file: {}", config_path.display()))?;

    // Update .gitignore (only .vyctor/ directory, not the config)
    update_gitignore(root)?;

    // Load and return the config to ensure it's valid
    let config: VyctorConfig = toml::from_str(&generate_default_config_template())
        .context("Failed to parse generated config")?;

    Ok(config)
}

/// Generate a default config file with helpful comments
fn generate_default_config_template() -> String {
    let cache_dir = dirs::cache_dir()
        .map(|p| p.join("vyctor").to_string_lossy().to_string())
        .unwrap_or_else(|| ".vyctor/cache".to_string());

    format!(
        r#"# Vyctor Configuration
# For full documentation, see: https://github.com/antonmagnus/vyctor

# =============================================================================
# INDEXING - What files to index and how to chunk them
# =============================================================================
[indexing]
include = [
    "**/*.rs",
    "**/*.ts",
    "**/*.tsx",
    "**/*.js",
    "**/*.jsx",
    "**/*.py",
    "**/*.go",
    "**/*.java",
    "**/*.c",
    "**/*.cpp",
    "**/*.h",
    "**/*.hpp",
    "**/*.md",
    "**/*.txt",
    "**/*.json",
    "**/*.yaml",
    "**/*.yml",
    "**/*.toml",
]

exclude = [
    "**/node_modules/**",
    "**/vendor/**",
    "**/.venv/**",
    "**/venv/**",
    "**/target/**",
    "**/dist/**",
    "**/build/**",
    "**/.next/**",
    "**/.open-next/**",
    "**/.nuxt/**",
    "**/.cache/**",
    "**/.git/**",
    "**/.idea/**",
    "**/.vscode/**",
    "**/package-lock.json",
    "**/yarn.lock",
    "**/pnpm-lock.yaml",
    "**/bun.lockb",
    "**/Cargo.lock",
    "**/.env",
    "**/.env.*",
    "**/.vyctor/**",
]

chunk_size = 1000
chunk_overlap = 200

# AST-aware semantic chunking (uses tree-sitter to split at function/class boundaries)
# Falls back to regex patterns, then character-based chunking if unavailable
semantic_chunking = true

# Maximum chunk size before forced splitting (for large functions/classes)
# The signature will be preserved as context in each sub-chunk
max_chunk_size = 3000

# =============================================================================
# EMBEDDING - Choose your embedding provider
# =============================================================================
[embedding]
# Provider options: "local", "openai", "voyage"
provider = "local"

# Embedding dimensions (must match your model)
# - local (all-MiniLM-L6-v2): 384
# - openai (text-embedding-3-small): 1536
# - openai (text-embedding-3-large): 3072
# - voyage (voyage-4-lite): 1024
dimensions = 384

batch_size = 100

[embedding.local]
model = "sentence-transformers/all-MiniLM-L6-v2"
cache_dir = "{cache_dir}"

[embedding.openai]
model = "text-embedding-3-small"
api_key_env = "OPENAI_API_KEY"
base_url = "https://api.openai.com/v1"

[embedding.voyage]
model = "voyage-3-lite"
api_key_env = "VOYAGE_API_KEY"
base_url = "https://api.voyageai.com/v1"

# =============================================================================
# RERANKER - Optional second-stage ranking (disabled by default)
# =============================================================================
[reranker]
provider = "none" # or "voyage"
top_k = 30

[reranker.voyage]
api_key_env = "VOYAGE_API_KEY"
base_url = "https://api.voyageai.com/v1"
model = "rerank-2.5-lite"

# =============================================================================
# WATCH - Background file watcher daemon settings
# =============================================================================
[watch]
auto_start = true
debounce_ms = 300
"#,
        cache_dir = cache_dir
    )
}

/// Add .vyctor/ to .gitignore if it exists
fn update_gitignore(root: &Path) -> Result<()> {
    let gitignore_path = root.join(".gitignore");
    let vyctor_ignore = "\n# Vyctor semantic search index\n.vyctor/\n";

    if gitignore_path.exists() {
        let contents = std::fs::read_to_string(&gitignore_path)?;
        if !contents.contains(".vyctor") {
            let mut file = std::fs::OpenOptions::new()
                .append(true)
                .open(&gitignore_path)?;
            use std::io::Write;
            file.write_all(vyctor_ignore.as_bytes())?;
        }
    } else {
        std::fs::write(&gitignore_path, vyctor_ignore.trim_start())?;
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    #[test]
    fn test_init_creates_directory_and_config() {
        let dir = tempdir().unwrap();
        let config = init_vyctor_dir(dir.path(), false).unwrap();

        // .vyctor directory should exist (for database)
        assert!(dir.path().join(VYCTOR_DIR).exists());
        // Config should be in project root, not inside .vyctor
        assert!(dir.path().join(CONFIG_FILE).exists());
        assert!(!config.indexing.include.is_empty());
    }

    #[test]
    fn test_init_creates_gitignore() {
        let dir = tempdir().unwrap();
        init_vyctor_dir(dir.path(), false).unwrap();

        let gitignore = dir.path().join(".gitignore");
        assert!(gitignore.exists());
        let contents = std::fs::read_to_string(gitignore).unwrap();
        // .vyctor/ should be ignored (contains database)
        assert!(contents.contains(".vyctor/"));
    }
}