talon-cli 0.4.2

Talon CLI: hybrid retrieval over Obsidian vaults and markdown corpora, with grounded answers, MCP server, and agent-native output.
Documentation
//! Config loading and initialization for the CLI process boundary.

use eyre::{Result, WrapErr as _, bail};
use fs_err as fs;
use std::path::{Component, Path, PathBuf};
use talon_core::{
    ChatAdapter, ChatAskConfig, ChatExpansionConfig, ChatSection, ContainerPath, CredentialsConfig,
    EmbeddingAdapter, EmbeddingConfig, EndpointAuthConfig, RerankAdapter, RerankConfig,
    TalonConfig,
};

/// Default config filename.
pub const CONFIG_FILE_NAME: &str = "config.toml";

/// Default config directory.
pub const CONFIG_DIR_NAME: &str = "talon";

/// Default config path: `~/.config/talon/config.toml`.
#[must_use]
pub fn default_config_path() -> PathBuf {
    let base = non_empty_env_os("XDG_CONFIG_HOME").map_or_else(
        || {
            dirs::home_dir()
                .unwrap_or_else(|| PathBuf::from("."))
                .join(".config")
        },
        PathBuf::from,
    );
    base.join(CONFIG_DIR_NAME).join(CONFIG_FILE_NAME)
}

/// Default `SQLite` index path.
#[must_use]
pub fn default_db_path() -> PathBuf {
    default_db_path_for_workspace("default")
}

/// Default `SQLite` index path for a workspace.
#[must_use]
pub fn default_db_path_for_workspace(workspace: &str) -> PathBuf {
    dirs::home_dir()
        .unwrap_or_else(|| PathBuf::from("."))
        .join(".talon")
        .join(format!("{}.db", sanitize_workspace_name(workspace)))
}

/// Config template written by `talon init`.
pub const CONFIG_TEMPLATE: &str = r#"# Talon configuration.
# Location: ~/.config/talon/config.toml

vault_path = "/Users/you/path/to/obsidian"
# Convention: ~/.talon/{workspace}.db. Update this if you rename the vault.
db_path = "~/.talon/obsidian.db"
include_patterns = ["**/*.md"]
ignore_patterns = [".obsidian/**", ".git/**", "templates/**", "*.canvas"]

[indexer]
chunk_tokens = 512
chunk_overlap = 64
chunk_min_tokens = 16

[search]
candidate_limit = 60
limit = 10
cache_size = 200
rerank_cache_size = 2000
rerank_batch_size = 4
rerank_max_tokens = 128

[embedding]
base_url = "http://localhost:8000"
adapter = "tei"
model = "embed"
document_model = "embed_chunked"
context_tokens = 512

[rerank]
base_url = "http://localhost:8000"
adapter = "minimal"
model = "rerank"
score_scale = "normalized"
truncate = true

[chat.expansion]
base_url = "http://localhost:8000/v1"
model = "bonsai"
context_tokens = 16000
max_output_tokens = 768

[chat.ask]
model = "qwen-smol"
context_tokens = 65536
max_output_tokens = 4096
planning_reasoning_effort = "none"
synthesis_reasoning_effort = "none"

[mcp.hooks]
recall_deadline_ms = 20000

# ── Scopes ─────────────────────────────────────────────────────────────────
# Named vault partitions with priority-based ranking.
# See docs/CONFIG.md for full reference.
# Uncomment and edit the Karpathy preset below.
#
# [scopes.wiki]
# glob     = ["wiki/**", "concepts/**"]
# priority = "boosted"
# default  = true
#
# ... additional scopes ...
"#;

/// Loads a config file from the given path.
///
/// # Errors
///
/// Returns an error if the file cannot be read or parsed.
pub fn load_config_file(path: &Path) -> Result<TalonConfig> {
    let content = fs::read_to_string(path)
        .wrap_err_with(|| format!("failed to read config file: {}", path.display()))?;

    let mut config: TalonConfig = toml::from_str(&content)
        .wrap_err_with(|| format!("failed to parse config file: {}", path.display()))?;
    resolve_config_paths(&mut config, path)?;
    if let Err(message) = config.chunker.validate() {
        bail!("{message}");
    }

    Ok(config)
}

/// Loads config from the default path or an explicit path.
///
/// # Errors
///
/// Returns an error if the config file cannot be found or parsed.
pub fn load_config(explicit_path: Option<&Path>) -> Result<TalonConfig> {
    let path = explicit_path
        .map(std::path::Path::to_path_buf)
        .or_else(|| non_empty_env_path("TALON_CONFIG_FILE"))
        .unwrap_or_else(default_config_path);

    if !path.exists() {
        bail!(
            "config not found at {}, run `talon init` first",
            path.display()
        );
    }

    let mut config = load_config_file(&path)?;
    config.config_file_path = Some(path);

    // TALON_VAULT overrides vault_path so callers (e.g. Hermes plugin) can
    // target a specific vault without modifying the config file.
    if let Some(vault_override) = non_empty_env_path("TALON_VAULT") {
        config.vault_path = absolutize_path(vault_override, &std::env::current_dir()?);
    }

    Ok(config)
}

fn non_empty_env_os(key: &str) -> Option<std::ffi::OsString> {
    std::env::var_os(key).filter(|value| !value.is_empty())
}

fn non_empty_env_path(key: &str) -> Option<PathBuf> {
    std::env::var(key).ok().and_then(|value| {
        if value.trim().is_empty() {
            None
        } else {
            Some(PathBuf::from(value))
        }
    })
}

/// Initializes the config file at the default path.
///
/// Creates the directory if it doesn't exist. Does not overwrite an existing file.
///
/// # Errors
///
/// Returns an error if the config directory cannot be created or the file cannot be written.
pub fn init_config() -> Result<bool> {
    let path = default_config_path();

    if path.exists() {
        return Ok(false);
    }

    if let Some(parent) = path.parent() {
        fs::create_dir_all(parent)
            .wrap_err_with(|| format!("failed to create config directory: {}", parent.display()))?;
    }

    fs::write(&path, CONFIG_TEMPLATE)
        .wrap_err_with(|| format!("failed to write config file: {}", path.display()))?;

    Ok(true)
}

/// Builds a default config from a vault path.
#[must_use]
pub fn default_config_for_vault(vault_path: PathBuf) -> TalonConfig {
    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let vault_path = absolutize_path(vault_path, &cwd);
    let db_path = default_db_path_for_workspace(&workspace_name_for_vault(&vault_path));

    TalonConfig {
        vault_path,
        db_path,
        config_file_path: None,
        include_patterns: vec!["**/*.md".to_string()],
        ignore_patterns: vec![
            ".obsidian/**".to_string(),
            ".git/**".to_string(),
            "templates/**".to_string(),
            "*.canvas".to_string(),
        ],
        credentials: CredentialsConfig::default(),
        embedding: EmbeddingConfig {
            base_url: "http://localhost:8000".to_string(),
            auth: EndpointAuthConfig::default(),
            adapter: EmbeddingAdapter::Tei,
            model: "embed".to_string(),
            document_model: Some("embed_chunked".to_string()),
            context_tokens: 512,
        },
        rerank: RerankConfig {
            base_url: "http://localhost:8000".to_string(),
            auth: EndpointAuthConfig::default(),
            adapter: RerankAdapter::Minimal,
            model: "rerank".to_string(),
            score_scale: talon_core::RerankScoreScale::default(),
            truncate: true,
        },
        chat: ChatSection {
            expansion: ChatExpansionConfig {
                base_url: "http://localhost:8000/v1".to_string(),
                auth: EndpointAuthConfig::default(),
                adapter: ChatAdapter::default(),
                model: "bonsai".to_string(),
                context_tokens: 16_000,
                max_output_tokens: Some(768),
            },
            ask: ChatAskConfig::default(),
        },
        mcp: talon_core::McpConfig::default(),
        scopes: default_karpathy_scopes(),
        search: talon_core::SearchConfig::default(),
        inspect: talon_core::InspectConfig::default(),
        chunker: talon_core::ChunkerConfig::default(),
    }
}

fn workspace_name_for_vault(vault_path: &Path) -> String {
    vault_path
        .file_name()
        .and_then(|name| name.to_str())
        .filter(|name| !name.trim().is_empty())
        .unwrap_or("default")
        .to_string()
}

fn sanitize_workspace_name(value: &str) -> String {
    let mut out = String::with_capacity(value.len());
    for ch in value.chars() {
        if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') {
            out.push(ch.to_ascii_lowercase());
        } else {
            out.push('-');
        }
    }
    let trimmed = out.trim_matches('-');
    if trimmed.is_empty() {
        "default".to_string()
    } else {
        trimmed.to_string()
    }
}

fn resolve_config_paths(config: &mut TalonConfig, config_path: &Path) -> Result<()> {
    let cwd = std::env::current_dir()?;
    let config_path = absolutize_path(config_path.to_path_buf(), &cwd);
    let config_dir = config_path.parent().unwrap_or(&cwd);

    config.vault_path = absolutize_path(config.vault_path.clone(), config_dir);
    config.db_path = absolutize_path(config.db_path.clone(), config_dir);
    Ok(())
}

fn absolutize_path(path: PathBuf, base: &Path) -> PathBuf {
    let path = expand_tilde(path);
    if path.is_absolute() {
        path
    } else {
        base.join(path)
    }
}

fn expand_tilde(path: PathBuf) -> PathBuf {
    let Some(home) = dirs::home_dir() else {
        return path;
    };
    let mut components = path.components();
    match components.next() {
        Some(Component::Normal(component)) if component == "~" => home.join(components.as_path()),
        _ => path,
    }
}

mod karpathy;
mod refresh;
use karpathy::default_karpathy_scopes;
pub use refresh::{
    RefreshLockPolicy, refresh_index_if_needed, refresh_index_with_lock, sync_lock_path,
};

/// Converts the configured vault path to a [`ContainerPath`], or `None` when
/// config is absent (e.g. when running without a config file).
#[must_use]
pub fn vault_container_path(config: Option<&TalonConfig>) -> Option<ContainerPath> {
    config.and_then(|c| ContainerPath::parse(c.vault_path.to_string_lossy().as_ref()).ok())
}

#[cfg(test)]
mod tests;