mod schema;
#[allow(unused_imports)]
pub use schema::{
EmbeddingConfig, EmbeddingProvider, IndexingConfig, RerankerConfig, RerankerProviderType,
VyctorConfig, WatchConfig,
};
use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
pub const VYCTOR_DIR: &str = ".vyctor";
pub const CONFIG_FILE: &str = "vyctor.config.toml";
pub const DB_FILE: &str = "index.duckdb";
pub fn find_vyctor_root() -> Result<PathBuf> {
let current = std::env::current_dir()?;
find_vyctor_root_from(¤t)
}
pub fn find_vyctor_root_from(start: &Path) -> Result<PathBuf> {
let mut current = start.to_path_buf();
loop {
let config_file = current.join(CONFIG_FILE);
if config_file.exists() && config_file.is_file() {
return Ok(current);
}
let vyctor_dir = current.join(VYCTOR_DIR);
if vyctor_dir.exists() && vyctor_dir.is_dir() {
return Ok(current);
}
if !current.pop() {
anyhow::bail!("No vyctor.config.toml found. Run 'vyctor init' to initialize.");
}
}
}
pub fn vyctor_dir() -> Result<PathBuf> {
Ok(find_vyctor_root()?.join(VYCTOR_DIR))
}
pub fn config_path() -> Result<PathBuf> {
Ok(find_vyctor_root()?.join(CONFIG_FILE))
}
pub fn db_path() -> Result<PathBuf> {
Ok(vyctor_dir()?.join(DB_FILE))
}
pub fn load_config() -> Result<VyctorConfig> {
let path = config_path()?;
let contents = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read config file: {}", path.display()))?;
let config: VyctorConfig = toml::from_str(&contents)
.with_context(|| format!("Failed to parse config file: {}", path.display()))?;
config.validate()?;
Ok(config)
}
#[allow(dead_code)]
pub fn save_config(config: &VyctorConfig) -> Result<()> {
let path = config_path()?;
let contents = toml::to_string_pretty(config).context("Failed to serialize config")?;
std::fs::write(&path, contents)
.with_context(|| format!("Failed to write config file: {}", path.display()))?;
Ok(())
}
pub fn init_vyctor_dir(root: &Path, force: bool) -> Result<VyctorConfig> {
let vyctor_dir = root.join(VYCTOR_DIR);
let config_path = root.join(CONFIG_FILE);
if config_path.exists() && !force {
anyhow::bail!("vyctor.config.toml already exists. Use --force to reinitialize.");
}
std::fs::create_dir_all(&vyctor_dir).with_context(|| {
format!(
"Failed to create .vyctor directory: {}",
vyctor_dir.display()
)
})?;
let contents = generate_default_config_template();
std::fs::write(&config_path, contents)
.with_context(|| format!("Failed to write config file: {}", config_path.display()))?;
update_gitignore(root)?;
let config: VyctorConfig = toml::from_str(&generate_default_config_template())
.context("Failed to parse generated config")?;
Ok(config)
}
fn generate_default_config_template() -> String {
let cache_dir = dirs::cache_dir()
.map(|p| p.join("vyctor").to_string_lossy().to_string())
.unwrap_or_else(|| ".vyctor/cache".to_string());
format!(
r#"# Vyctor Configuration
# For full documentation, see: https://github.com/antonmagnus/vyctor
# =============================================================================
# INDEXING - What files to index and how to chunk them
# =============================================================================
[indexing]
include = [
"**/*.rs",
"**/*.ts",
"**/*.tsx",
"**/*.js",
"**/*.jsx",
"**/*.py",
"**/*.go",
"**/*.java",
"**/*.c",
"**/*.cpp",
"**/*.h",
"**/*.hpp",
"**/*.md",
"**/*.txt",
"**/*.json",
"**/*.yaml",
"**/*.yml",
"**/*.toml",
]
exclude = [
"**/node_modules/**",
"**/vendor/**",
"**/.venv/**",
"**/venv/**",
"**/target/**",
"**/dist/**",
"**/build/**",
"**/.next/**",
"**/.open-next/**",
"**/.nuxt/**",
"**/.cache/**",
"**/.git/**",
"**/.idea/**",
"**/.vscode/**",
"**/package-lock.json",
"**/yarn.lock",
"**/pnpm-lock.yaml",
"**/bun.lockb",
"**/Cargo.lock",
"**/.env",
"**/.env.*",
"**/.vyctor/**",
]
chunk_size = 1000
chunk_overlap = 200
# AST-aware semantic chunking (uses tree-sitter to split at function/class boundaries)
# Falls back to regex patterns, then character-based chunking if unavailable
semantic_chunking = true
# Maximum chunk size before forced splitting (for large functions/classes)
# The signature will be preserved as context in each sub-chunk
max_chunk_size = 3000
# =============================================================================
# EMBEDDING - Choose your embedding provider
# =============================================================================
[embedding]
# Provider options: "local", "openai", "voyage"
provider = "local"
# Embedding dimensions (must match your model)
# - local (all-MiniLM-L6-v2): 384
# - openai (text-embedding-3-small): 1536
# - openai (text-embedding-3-large): 3072
# - voyage (voyage-4-lite): 1024
dimensions = 384
batch_size = 100
[embedding.local]
model = "sentence-transformers/all-MiniLM-L6-v2"
cache_dir = "{cache_dir}"
[embedding.openai]
model = "text-embedding-3-small"
api_key_env = "OPENAI_API_KEY"
base_url = "https://api.openai.com/v1"
[embedding.voyage]
model = "voyage-3-lite"
api_key_env = "VOYAGE_API_KEY"
base_url = "https://api.voyageai.com/v1"
# =============================================================================
# RERANKER - Optional second-stage ranking (disabled by default)
# =============================================================================
[reranker]
provider = "none" # or "voyage"
top_k = 30
[reranker.voyage]
api_key_env = "VOYAGE_API_KEY"
base_url = "https://api.voyageai.com/v1"
model = "rerank-2.5-lite"
# =============================================================================
# WATCH - Background file watcher daemon settings
# =============================================================================
[watch]
auto_start = true
debounce_ms = 300
"#,
cache_dir = cache_dir
)
}
fn update_gitignore(root: &Path) -> Result<()> {
let gitignore_path = root.join(".gitignore");
let vyctor_ignore = "\n# Vyctor semantic search index\n.vyctor/\n";
if gitignore_path.exists() {
let contents = std::fs::read_to_string(&gitignore_path)?;
if !contents.contains(".vyctor") {
let mut file = std::fs::OpenOptions::new()
.append(true)
.open(&gitignore_path)?;
use std::io::Write;
file.write_all(vyctor_ignore.as_bytes())?;
}
} else {
std::fs::write(&gitignore_path, vyctor_ignore.trim_start())?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_init_creates_directory_and_config() {
let dir = tempdir().unwrap();
let config = init_vyctor_dir(dir.path(), false).unwrap();
assert!(dir.path().join(VYCTOR_DIR).exists());
assert!(dir.path().join(CONFIG_FILE).exists());
assert!(!config.indexing.include.is_empty());
}
#[test]
fn test_init_creates_gitignore() {
let dir = tempdir().unwrap();
init_vyctor_dir(dir.path(), false).unwrap();
let gitignore = dir.path().join(".gitignore");
assert!(gitignore.exists());
let contents = std::fs::read_to_string(gitignore).unwrap();
assert!(contents.contains(".vyctor/"));
}
}