use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VyctorConfig {
pub indexing: IndexingConfig,
pub embedding: EmbeddingConfig,
#[serde(default)]
pub reranker: RerankerConfig,
#[serde(default)]
pub watch: WatchConfig,
}
impl VyctorConfig {
pub fn validate(&self) -> Result<()> {
self.indexing
.validate()
.context("Invalid indexing config")?;
self.embedding
.validate()
.context("Invalid embedding config")?;
self.reranker
.validate()
.context("Invalid reranker config")?;
self.watch.validate().context("Invalid watch config")?;
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexingConfig {
pub include: Vec<String>,
pub exclude: Vec<String>,
pub chunk_size: usize,
pub chunk_overlap: usize,
#[serde(default = "default_semantic_chunking")]
pub semantic_chunking: bool,
#[serde(default = "default_max_chunk_size")]
pub max_chunk_size: usize,
}
fn default_semantic_chunking() -> bool {
true
}
fn default_max_chunk_size() -> usize {
3000
}
impl Default for IndexingConfig {
fn default() -> Self {
Self {
include: vec![
"**/*.rs".to_string(),
"**/*.ts".to_string(),
"**/*.tsx".to_string(),
"**/*.js".to_string(),
"**/*.jsx".to_string(),
"**/*.py".to_string(),
"**/*.go".to_string(),
"**/*.java".to_string(),
"**/*.c".to_string(),
"**/*.cpp".to_string(),
"**/*.h".to_string(),
"**/*.hpp".to_string(),
"**/*.md".to_string(),
"**/*.txt".to_string(),
"**/*.json".to_string(),
"**/*.yaml".to_string(),
"**/*.yml".to_string(),
"**/*.toml".to_string(),
],
exclude: vec![
"**/node_modules/**".to_string(),
"**/vendor/**".to_string(),
"**/.venv/**".to_string(),
"**/venv/**".to_string(),
"**/env/**".to_string(),
"**/.bundle/**".to_string(),
"**/target/**".to_string(),
"**/dist/**".to_string(),
"**/build/**".to_string(),
"**/out/**".to_string(),
"**/bin/**".to_string(),
"**/obj/**".to_string(),
"**/.output/**".to_string(),
"**/coverage/**".to_string(),
"**/.next/**".to_string(),
"**/.open-next/**".to_string(),
"**/.nuxt/**".to_string(),
"**/.svelte-kit/**".to_string(),
"**/.astro/**".to_string(),
"**/.turbo/**".to_string(),
"**/.cache/**".to_string(),
"**/.parcel-cache/**".to_string(),
"**/.wrangler/**".to_string(),
"**/.vercel/**".to_string(),
"**/.netlify/**".to_string(),
"**/.serverless/**".to_string(),
"**/.terraform/**".to_string(),
"**/.git/**".to_string(),
"**/.svn/**".to_string(),
"**/.hg/**".to_string(),
"**/.idea/**".to_string(),
"**/.vscode/**".to_string(),
"**/.vs/**".to_string(),
"**/*.swp".to_string(),
"**/*.swo".to_string(),
"**/.DS_Store".to_string(),
"**/Thumbs.db".to_string(),
"**/.gradle/**".to_string(),
"**/.m2/**".to_string(),
"**/__pycache__/**".to_string(),
"**/*.pyc".to_string(),
"**/*.pyo".to_string(),
"**/package-lock.json".to_string(),
"**/yarn.lock".to_string(),
"**/pnpm-lock.yaml".to_string(),
"**/bun.lockb".to_string(),
"**/Cargo.lock".to_string(),
"**/Gemfile.lock".to_string(),
"**/poetry.lock".to_string(),
"**/Pipfile.lock".to_string(),
"**/composer.lock".to_string(),
"**/go.sum".to_string(),
"**/flake.lock".to_string(),
"**/.env".to_string(),
"**/.env.*".to_string(),
"**/*.env".to_string(),
"**/.envrc".to_string(),
"**/secrets/**".to_string(),
"**/*.log".to_string(),
"**/logs/**".to_string(),
"**/tmp/**".to_string(),
"**/temp/**".to_string(),
"**/*.min.js".to_string(),
"**/*.min.css".to_string(),
"**/*.map".to_string(),
"**/*.chunk.js".to_string(),
"**/*.bundle.js".to_string(),
"**/.vyctor/**".to_string(),
],
chunk_size: 1000,
chunk_overlap: 200,
semantic_chunking: default_semantic_chunking(),
max_chunk_size: default_max_chunk_size(),
}
}
}
impl IndexingConfig {
pub fn validate(&self) -> Result<()> {
if self.include.is_empty() {
anyhow::bail!("At least one include pattern is required");
}
if self.chunk_size == 0 {
anyhow::bail!("chunk_size must be greater than 0");
}
if self.chunk_overlap >= self.chunk_size {
anyhow::bail!("chunk_overlap must be less than chunk_size");
}
if self.max_chunk_size < self.chunk_size {
anyhow::bail!("max_chunk_size must be at least chunk_size");
}
Ok(())
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum EmbeddingProvider {
OpenAI,
Voyage,
#[default]
Local,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingConfig {
pub provider: EmbeddingProvider,
pub dimensions: usize,
#[serde(default)]
pub openai: OpenAIConfig,
#[serde(default)]
pub voyage: VoyageConfig,
#[serde(default)]
pub local: LocalModelConfig,
#[serde(default = "default_batch_size")]
pub batch_size: usize,
}
fn default_batch_size() -> usize {
100
}
impl Default for EmbeddingConfig {
fn default() -> Self {
Self {
provider: EmbeddingProvider::Local,
dimensions: 384,
openai: OpenAIConfig::default(),
voyage: VoyageConfig::default(),
local: LocalModelConfig::default(),
batch_size: 100,
}
}
}
impl EmbeddingConfig {
pub fn validate(&self) -> Result<()> {
if self.dimensions == 0 {
anyhow::bail!("dimensions must be greater than 0");
}
if self.batch_size == 0 {
anyhow::bail!("batch_size must be greater than 0");
}
match self.provider {
EmbeddingProvider::OpenAI => {
if self.openai.api_key_env.is_empty() {
anyhow::bail!("openai.api_key_env is required when using OpenAI provider");
}
if self.openai.model.is_empty() {
anyhow::bail!("openai.model is required when using OpenAI provider");
}
}
EmbeddingProvider::Voyage => {
if self.voyage.api_key_env.is_empty() {
anyhow::bail!("voyage.api_key_env is required when using Voyage provider");
}
if self.voyage.model.is_empty() {
anyhow::bail!("voyage.model is required when using Voyage provider");
}
}
EmbeddingProvider::Local => {
if self.local.model.is_empty() {
anyhow::bail!("local.model is required when using Local provider");
}
}
}
Ok(())
}
pub fn get_api_key(&self) -> Result<String> {
let env_var = match self.provider {
EmbeddingProvider::OpenAI => &self.openai.api_key_env,
EmbeddingProvider::Voyage => &self.voyage.api_key_env,
EmbeddingProvider::Local => return Ok(String::new()),
};
std::env::var(env_var).with_context(|| format!("Environment variable {} not set", env_var))
}
pub fn get_model(&self) -> &str {
match self.provider {
EmbeddingProvider::OpenAI => &self.openai.model,
EmbeddingProvider::Voyage => &self.voyage.model,
EmbeddingProvider::Local => &self.local.model,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAIConfig {
#[serde(default = "default_openai_model")]
pub model: String,
#[serde(default = "default_openai_api_key_env")]
pub api_key_env: String,
#[serde(default = "default_openai_base_url")]
pub base_url: String,
}
fn default_openai_model() -> String {
"text-embedding-3-small".to_string()
}
fn default_openai_api_key_env() -> String {
"OPENAI_API_KEY".to_string()
}
fn default_openai_base_url() -> String {
"https://api.openai.com/v1".to_string()
}
impl Default for OpenAIConfig {
fn default() -> Self {
Self {
model: default_openai_model(),
api_key_env: default_openai_api_key_env(),
base_url: default_openai_base_url(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoyageConfig {
#[serde(default = "default_voyage_model")]
pub model: String,
#[serde(default = "default_voyage_api_key_env")]
pub api_key_env: String,
#[serde(default = "default_voyage_base_url")]
pub base_url: String,
}
fn default_voyage_model() -> String {
"voyage-3-lite".to_string()
}
fn default_voyage_api_key_env() -> String {
"VOYAGE_API_KEY".to_string()
}
fn default_voyage_base_url() -> String {
"https://api.voyageai.com/v1".to_string()
}
impl Default for VoyageConfig {
fn default() -> Self {
Self {
model: default_voyage_model(),
api_key_env: default_voyage_api_key_env(),
base_url: default_voyage_base_url(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LocalModelConfig {
pub model: String,
#[serde(default = "default_cache_dir")]
pub cache_dir: String,
}
fn default_cache_dir() -> String {
dirs::cache_dir()
.map(|p| p.join("vyctor").to_string_lossy().to_string())
.unwrap_or_else(|| ".vyctor/cache".to_string())
}
impl Default for LocalModelConfig {
fn default() -> Self {
Self {
model: "sentence-transformers/all-MiniLM-L6-v2".to_string(),
cache_dir: default_cache_dir(),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum RerankerProviderType {
Voyage,
#[default]
None,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RerankerConfig {
pub provider: RerankerProviderType,
#[serde(default = "default_reranker_top_k")]
pub top_k: usize,
#[serde(default = "default_reranker_enabled")]
pub enabled: bool,
#[serde(default)]
pub voyage: VoyageRerankerConfig,
}
fn default_reranker_top_k() -> usize {
30
}
fn default_reranker_enabled() -> bool {
true
}
impl Default for RerankerConfig {
fn default() -> Self {
Self {
provider: RerankerProviderType::None,
top_k: default_reranker_top_k(),
enabled: default_reranker_enabled(),
voyage: VoyageRerankerConfig::default(),
}
}
}
impl RerankerConfig {
pub fn validate(&self) -> Result<()> {
if !self.enabled || self.provider == RerankerProviderType::None {
return Ok(());
}
if self.top_k == 0 {
anyhow::bail!("top_k must be greater than 0");
}
match self.provider {
RerankerProviderType::Voyage => {
if self.voyage.api_key_env.is_empty() {
anyhow::bail!("voyage.api_key_env is required when using Voyage reranker");
}
if self.voyage.model.is_empty() {
anyhow::bail!("voyage.model is required when using Voyage reranker");
}
}
RerankerProviderType::None => {}
}
Ok(())
}
pub fn get_voyage_api_key(&self) -> Result<String> {
std::env::var(&self.voyage.api_key_env)
.with_context(|| format!("Environment variable {} not set", self.voyage.api_key_env))
}
pub fn get_model(&self) -> &str {
match self.provider {
RerankerProviderType::Voyage => &self.voyage.model,
RerankerProviderType::None => "",
}
}
pub fn is_active(&self) -> bool {
self.enabled && self.provider != RerankerProviderType::None
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoyageRerankerConfig {
pub api_key_env: String,
#[serde(default = "default_voyage_reranker_base_url")]
pub base_url: String,
#[serde(default = "default_voyage_reranker_model")]
pub model: String,
}
fn default_voyage_reranker_base_url() -> String {
"https://api.voyageai.com/v1".to_string()
}
fn default_voyage_reranker_model() -> String {
"rerank-2".to_string()
}
impl Default for VoyageRerankerConfig {
fn default() -> Self {
Self {
api_key_env: "VOYAGE_API_KEY".to_string(),
base_url: default_voyage_reranker_base_url(),
model: default_voyage_reranker_model(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WatchConfig {
#[serde(default)]
pub auto_start: bool,
#[serde(default = "default_debounce_ms")]
pub debounce_ms: u64,
}
fn default_debounce_ms() -> u64 {
300
}
impl Default for WatchConfig {
fn default() -> Self {
Self {
auto_start: false,
debounce_ms: default_debounce_ms(),
}
}
}
impl WatchConfig {
pub fn validate(&self) -> Result<()> {
if self.debounce_ms == 0 {
anyhow::bail!("debounce_ms must be greater than 0");
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config_is_valid() {
let config = VyctorConfig::default();
assert!(config.validate().is_ok());
}
#[test]
fn test_serialize_deserialize() {
let config = VyctorConfig::default();
let serialized = toml::to_string(&config).unwrap();
let deserialized: VyctorConfig = toml::from_str(&serialized).unwrap();
assert_eq!(config.embedding.provider, deserialized.embedding.provider);
}
#[test]
fn test_invalid_chunk_overlap() {
let mut config = VyctorConfig::default();
config.indexing.chunk_overlap = config.indexing.chunk_size + 1;
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_empty_include() {
let mut config = VyctorConfig::default();
config.indexing.include = vec![];
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_zero_chunk_size() {
let mut config = VyctorConfig::default();
config.indexing.chunk_size = 0;
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_empty_local_model() {
let mut config = VyctorConfig::default();
config.embedding.provider = EmbeddingProvider::Local;
config.embedding.local.model = "".to_string();
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_empty_openai_model() {
let mut config = VyctorConfig::default();
config.embedding.provider = EmbeddingProvider::OpenAI;
config.embedding.openai.model = "".to_string();
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_empty_voyage_model() {
let mut config = VyctorConfig::default();
config.embedding.provider = EmbeddingProvider::Voyage;
config.embedding.voyage.model = "".to_string();
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_zero_dimensions() {
let mut config = VyctorConfig::default();
config.embedding.dimensions = 0;
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_zero_batch_size() {
let mut config = VyctorConfig::default();
config.embedding.batch_size = 0;
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_empty_openai_api_key_env() {
let mut config = VyctorConfig::default();
config.embedding.provider = EmbeddingProvider::OpenAI;
config.embedding.openai.api_key_env = "".to_string();
assert!(config.validate().is_err());
}
#[test]
fn test_invalid_empty_voyage_api_key_env() {
let mut config = VyctorConfig::default();
config.embedding.provider = EmbeddingProvider::Voyage;
config.embedding.voyage.api_key_env = "".to_string();
assert!(config.validate().is_err());
}
#[test]
fn test_local_provider_validates_without_api_key() {
let mut config = VyctorConfig::default();
config.embedding.provider = EmbeddingProvider::Local;
assert!(config.validate().is_ok());
}
#[test]
fn test_embedding_provider_serialization() {
let openai = EmbeddingProvider::OpenAI;
let voyage = EmbeddingProvider::Voyage;
let local = EmbeddingProvider::Local;
assert_eq!(serde_json::to_string(&openai).unwrap(), "\"openai\"");
assert_eq!(serde_json::to_string(&voyage).unwrap(), "\"voyage\"");
assert_eq!(serde_json::to_string(&local).unwrap(), "\"local\"");
}
#[test]
fn test_embedding_provider_deserialization() {
let openai: EmbeddingProvider = serde_json::from_str("\"openai\"").unwrap();
let voyage: EmbeddingProvider = serde_json::from_str("\"voyage\"").unwrap();
let local: EmbeddingProvider = serde_json::from_str("\"local\"").unwrap();
assert_eq!(openai, EmbeddingProvider::OpenAI);
assert_eq!(voyage, EmbeddingProvider::Voyage);
assert_eq!(local, EmbeddingProvider::Local);
}
#[test]
fn test_default_provider_is_local() {
assert_eq!(EmbeddingProvider::default(), EmbeddingProvider::Local);
}
#[test]
fn test_default_indexing_includes_common_extensions() {
let config = IndexingConfig::default();
assert!(config.include.contains(&"**/*.rs".to_string()));
assert!(config.include.contains(&"**/*.ts".to_string()));
assert!(config.include.contains(&"**/*.py".to_string()));
assert!(config.include.contains(&"**/*.md".to_string()));
}
#[test]
fn test_default_indexing_excludes_common_dirs() {
let config = IndexingConfig::default();
assert!(config.exclude.contains(&"**/node_modules/**".to_string()));
assert!(config.exclude.contains(&"**/target/**".to_string()));
assert!(config.exclude.contains(&"**/.git/**".to_string()));
}
#[test]
fn test_chunk_overlap_equals_chunk_size_is_invalid() {
let mut config = VyctorConfig::default();
config.indexing.chunk_overlap = config.indexing.chunk_size;
assert!(config.validate().is_err());
}
#[test]
fn test_toml_round_trip() {
let config = VyctorConfig::default();
let toml_str = toml::to_string_pretty(&config).unwrap();
let parsed: VyctorConfig = toml::from_str(&toml_str).unwrap();
assert_eq!(config.indexing.chunk_size, parsed.indexing.chunk_size);
assert_eq!(config.embedding.get_model(), parsed.embedding.get_model());
assert_eq!(config.embedding.dimensions, parsed.embedding.dimensions);
}
#[test]
fn test_parse_minimal_toml() {
let minimal = r#"
[indexing]
include = ["**/*.rs"]
exclude = []
chunk_size = 500
chunk_overlap = 50
[embedding]
provider = "openai"
dimensions = 384
[embedding.openai]
model = "test-model"
api_key_env = "TEST_KEY"
"#;
let config: VyctorConfig = toml::from_str(minimal).unwrap();
assert_eq!(config.indexing.chunk_size, 500);
assert_eq!(config.embedding.openai.model, "test-model");
assert_eq!(config.embedding.get_model(), "test-model");
}
#[test]
fn test_get_api_key_returns_error_when_not_set() {
let config = EmbeddingConfig {
provider: EmbeddingProvider::OpenAI,
openai: OpenAIConfig {
api_key_env: "NONEXISTENT_VAR_12345".to_string(),
..Default::default()
},
..Default::default()
};
assert!(config.get_api_key().is_err());
}
#[test]
fn test_get_api_key_returns_value_when_set() {
std::env::set_var("TEST_VYCTOR_API_KEY", "test-value");
let config = EmbeddingConfig {
provider: EmbeddingProvider::OpenAI,
openai: OpenAIConfig {
api_key_env: "TEST_VYCTOR_API_KEY".to_string(),
..Default::default()
},
..Default::default()
};
assert_eq!(config.get_api_key().unwrap(), "test-value");
std::env::remove_var("TEST_VYCTOR_API_KEY");
}
#[test]
fn test_local_provider_returns_empty_api_key() {
let config = EmbeddingConfig {
provider: EmbeddingProvider::Local,
..Default::default()
};
assert_eq!(config.get_api_key().unwrap(), "");
}
#[test]
fn test_default_openai_base_url() {
let config = OpenAIConfig::default();
assert_eq!(config.base_url, "https://api.openai.com/v1");
}
#[test]
fn test_default_voyage_base_url() {
let config = VoyageConfig::default();
assert_eq!(config.base_url, "https://api.voyageai.com/v1");
}
#[test]
fn test_default_batch_size() {
let config = EmbeddingConfig::default();
assert_eq!(config.batch_size, 100);
}
#[test]
fn test_clone_config() {
let config = VyctorConfig::default();
let cloned = config.clone();
assert_eq!(config.indexing.chunk_size, cloned.indexing.chunk_size);
assert_eq!(config.embedding.get_model(), cloned.embedding.get_model());
}
#[test]
fn test_default_watch_config() {
let config = WatchConfig::default();
assert!(!config.auto_start);
assert_eq!(config.debounce_ms, 300);
}
#[test]
fn test_watch_config_validation() {
let config = WatchConfig::default();
assert!(config.validate().is_ok());
}
#[test]
fn test_watch_config_invalid_zero_debounce() {
let config = WatchConfig {
auto_start: false,
debounce_ms: 0,
};
assert!(config.validate().is_err());
}
#[test]
fn test_vyctor_config_includes_watch() {
let config = VyctorConfig::default();
assert!(!config.watch.auto_start);
assert_eq!(config.watch.debounce_ms, 300);
}
#[test]
fn test_parse_config_with_watch_section() {
let toml = r#"
[indexing]
include = ["**/*.rs"]
exclude = []
chunk_size = 500
chunk_overlap = 50
[embedding]
provider = "local"
dimensions = 384
[embedding.local]
model = "test-model"
[watch]
auto_start = true
debounce_ms = 500
"#;
let config: VyctorConfig = toml::from_str(toml).unwrap();
assert!(config.watch.auto_start);
assert_eq!(config.watch.debounce_ms, 500);
}
#[test]
fn test_parse_config_without_watch_section() {
let toml = r#"
[indexing]
include = ["**/*.rs"]
exclude = []
chunk_size = 500
chunk_overlap = 50
[embedding]
provider = "local"
dimensions = 384
[embedding.local]
model = "test-model"
"#;
let config: VyctorConfig = toml::from_str(toml).unwrap();
assert!(!config.watch.auto_start);
assert_eq!(config.watch.debounce_ms, 300);
}
#[test]
fn test_get_model_returns_correct_provider_model() {
let mut config = EmbeddingConfig::default();
config.provider = EmbeddingProvider::Local;
config.local.model = "local-model".to_string();
assert_eq!(config.get_model(), "local-model");
config.provider = EmbeddingProvider::OpenAI;
config.openai.model = "openai-model".to_string();
assert_eq!(config.get_model(), "openai-model");
config.provider = EmbeddingProvider::Voyage;
config.voyage.model = "voyage-model".to_string();
assert_eq!(config.get_model(), "voyage-model");
}
}