use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::fs;
use std::path::Path;
pub const DEFAULT_CONFIG_FILE: &str = ".leindex/config.toml";
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ProjectConfig {
pub languages: LanguageConfig,
pub exclusions: ExclusionConfig,
pub tokens: TokenConfig,
pub storage: StorageConfig,
pub memory: MemoryConfig,
}
impl ProjectConfig {
pub fn load<P: AsRef<Path>>(project_path: P) -> Result<Self> {
let config_path = project_path.as_ref().join(DEFAULT_CONFIG_FILE);
if !config_path.exists() {
return Ok(ProjectConfig::default());
}
let content = fs::read_to_string(&config_path)
.with_context(|| format!("Failed to read config file: {:?}", config_path))?;
let config: ProjectConfig = toml::from_str(&content)
.with_context(|| format!("Failed to parse config file: {:?}", config_path))?;
Ok(config)
}
pub fn save<P: AsRef<Path>>(&self, project_path: P) -> Result<()> {
let config_dir = project_path.as_ref().join(".leindex");
fs::create_dir_all(&config_dir)
.with_context(|| format!("Failed to create config directory: {:?}", config_dir))?;
let config_path = config_dir.join("config.toml");
let toml_string =
toml::to_string_pretty(self).context("Failed to serialize configuration")?;
fs::write(&config_path, toml_string)
.with_context(|| format!("Failed to write config file: {:?}", config_path))?;
Ok(())
}
pub fn enabled_extensions(&self) -> HashSet<String> {
self.languages.enabled_extensions()
}
pub fn should_exclude<P: AsRef<Path>>(&self, path: P) -> bool {
let path = path.as_ref();
if let Some(parent) = path.parent() {
if let Some(dir_name) = parent.file_name() {
let dir = dir_name.to_string_lossy();
for pattern in &self.exclusions.directory_patterns {
if pattern.matches(&dir) {
return true;
}
}
}
}
if let Some(file_name) = path.file_name() {
let name = file_name.to_string_lossy();
for pattern in &self.exclusions.file_patterns {
if pattern.matches(&name) {
return true;
}
}
}
let path_str = path.to_string_lossy();
for pattern in &self.exclusions.path_patterns {
if pattern.matches(&path_str) {
return true;
}
}
false
}
pub fn token_budget(&self) -> usize {
self.tokens.default_budget
}
pub fn max_context_tokens(&self) -> usize {
self.tokens.max_context
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageConfig {
pub enable_all: bool,
pub enabled: Vec<String>,
pub disabled: Vec<String>,
}
impl Default for LanguageConfig {
fn default() -> Self {
Self {
enable_all: true,
enabled: Vec::new(),
disabled: vec!["vim".to_string()], }
}
}
impl LanguageConfig {
pub fn enabled_extensions(&self) -> HashSet<String> {
let mut extensions = HashSet::new();
let all_extensions = [
"rs", "py", "js", "jsx", "mjs", "cjs", "ts", "tsx", "mts",
"cts", "go", "java", "cpp", "cc", "cxx", "c", "h", "hpp", "cs", "rb", "php", "lua", "scala", "sc", "sh", "bash", "json", ];
if self.enable_all {
for ext in &all_extensions {
if !self.disabled.contains(&ext.to_string()) {
extensions.insert(ext.to_string());
}
}
} else {
for lang in &self.enabled {
if all_extensions.contains(&lang.as_str()) {
extensions.insert(lang.clone());
} else {
let exts = Self::language_to_extensions(lang);
for ext in exts {
if !self.disabled.contains(&ext.to_string()) {
extensions.insert(ext.to_string());
}
}
}
}
}
extensions
}
fn language_to_extensions(lang: &str) -> Vec<&'static str> {
match lang.to_lowercase().as_str() {
"rust" => vec!["rs"],
"python" => vec!["py"],
"javascript" => vec!["js", "jsx", "mjs", "cjs"],
"typescript" => vec!["ts", "tsx", "mts", "cts"],
"go" => vec!["go"],
"java" => vec!["java"],
"cpp" | "c++" => vec!["cpp", "cc", "cxx", "c", "h", "hpp"],
"c" => vec!["c", "h"],
"csharp" | "c#" => vec!["cs"],
"ruby" => vec!["rb"],
"php" => vec!["php"],
"lua" => vec!["lua"],
"scala" => vec!["scala", "sc"],
"bash" | "shell" => vec!["sh", "bash"],
"json" => vec!["json"],
_ => vec![],
}
}
pub fn is_extension_enabled(&self, ext: &str) -> bool {
self.enabled_extensions().contains(ext)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExclusionConfig {
pub directory_patterns: Vec<StringPattern>,
pub file_patterns: Vec<StringPattern>,
pub path_patterns: Vec<StringPattern>,
}
impl Default for ExclusionConfig {
fn default() -> Self {
Self {
directory_patterns: vec![
".git".into(),
".hg".into(),
".svn".into(),
"target".into(),
"build".into(),
"dist".into(),
"out".into(),
".next".into(),
"coverage".into(),
"node_modules".into(),
"vendor".into(),
"bower_components".into(),
".venv".into(),
"venv".into(),
"env".into(),
"__pycache__".into(),
".tox".into(),
".mypy_cache".into(),
".pytest_cache".into(),
".ruff_cache".into(),
".idea".into(),
".vscode".into(),
".leindex".into(),
],
file_patterns: vec![
"*.min.js".into(),
"*.min.css".into(),
"*.pb.go".into(), "*.generated.rs".into(),
"*.bundle.js".into(), "*.chunk.js".into(), ],
path_patterns: vec![
"*/target/*".into(),
"*/node_modules/*".into(),
"*/dist/*".into(),
"*/out/*".into(),
],
}
}
}
impl ExclusionConfig {
pub fn should_exclude(&self, path: &str) -> bool {
for segment in path.split('/') {
for pattern in &self.directory_patterns {
if pattern.matches(segment) {
return true;
}
}
}
if let Some(filename) = path.rsplit('/').next() {
for pattern in &self.file_patterns {
if pattern.matches(filename) {
return true;
}
}
}
for pattern in &self.path_patterns {
if pattern.matches(path) {
return true;
}
}
false
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StringPattern {
pub pattern: String,
}
impl From<&str> for StringPattern {
fn from(s: &str) -> Self {
Self {
pattern: s.to_string(),
}
}
}
impl From<String> for StringPattern {
fn from(s: String) -> Self {
Self { pattern: s }
}
}
impl StringPattern {
pub fn matches(&self, text: &str) -> bool {
if self.pattern == "*" {
return true;
}
if !self.pattern.contains('*') {
return self.pattern == text;
}
let parts: Vec<&str> = self.pattern.split('*').collect();
if parts.len() == 2 {
let prefix = parts[0];
let suffix = parts[1];
text.starts_with(prefix) && text.ends_with(suffix)
} else {
let mut idx = 0;
for (i, part) in parts.iter().enumerate() {
if i == parts.len() - 1 && !part.is_empty() {
if !text.ends_with(part) {
return false;
}
} else if !part.is_empty() {
if let Some(pos) = text[idx..].find(part) {
idx = pos + part.len();
} else {
return false;
}
}
}
true
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TokenConfig {
pub default_budget: usize,
pub max_context: usize,
pub min_results: usize,
pub max_results: usize,
}
impl Default for TokenConfig {
fn default() -> Self {
Self {
default_budget: 2000,
max_context: 5000,
min_results: 5,
max_results: 20,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageConfig {
pub backend: StorageBackend,
pub db_path: Option<String>,
pub wal_enabled: bool,
pub cache_size_pages: Option<usize>,
pub connection_timeout_secs: Option<u64>,
}
impl Default for StorageConfig {
fn default() -> Self {
Self {
backend: StorageBackend::SQLite,
db_path: None, wal_enabled: true,
cache_size_pages: Some(10000),
connection_timeout_secs: Some(30),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum StorageBackend {
SQLite,
Turso {
database_url: String,
auth_token: Option<String>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryConfig {
pub spill_threshold: f64,
pub auto_spill: bool,
pub max_memory_mb: usize,
}
impl Default for MemoryConfig {
fn default() -> Self {
Self {
spill_threshold: 0.9,
auto_spill: true,
max_memory_mb: 8192, }
}
}
#[derive(Debug, thiserror::Error)]
pub enum ConfigError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Serialization error: {0}")]
Serialization(String),
#[error("Parse error: {0}")]
Parse(String),
#[error("Invalid configuration: {0}")]
Invalid(String),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = ProjectConfig::default();
assert!(config.languages.enable_all);
assert_eq!(config.tokens.default_budget, 2000);
}
#[test]
fn test_language_extensions() {
let config = LanguageConfig::default();
let exts = config.enabled_extensions();
assert!(exts.contains("rs"));
assert!(exts.contains("py"));
assert!(!exts.contains("vim"));
}
#[test]
fn test_exclusion_patterns() {
let config = ExclusionConfig::default();
assert!(config.should_exclude("target/main.rs"));
assert!(config.should_exclude("node_modules/package/index.js"));
assert!(!config.should_exclude("src/main.rs"));
}
#[test]
fn test_exclusion_build_output_dirs() {
let config = ExclusionConfig::default();
assert!(config.should_exclude("out/bundle.js"));
assert!(config.should_exclude("dist/index.js"));
assert!(config.should_exclude("build/main.js"));
assert!(config.should_exclude(".next/server/app.js"));
assert!(config.should_exclude("coverage/lcov.info"));
assert!(config.should_exclude("__pycache__/module.pyc"));
assert!(config.should_exclude(".mypy_cache/stubs.json"));
assert!(config.should_exclude(".pytest_cache/v/cache.json"));
assert!(config.should_exclude(".ruff_cache/data.json"));
assert!(config.should_exclude(".tox/py39/lib/site.py"));
assert!(config.should_exclude(".idea/workspace.xml"));
assert!(config.should_exclude(".vscode/settings.json"));
assert!(config.should_exclude(".venv/lib/python3.12/os.py"));
assert!(config.should_exclude("venv/lib/python3.12/os.py"));
assert!(!config.should_exclude("src/main.rs"));
assert!(!config.should_exclude("lib/utils.ts"));
}
#[test]
fn test_exclusion_minified_and_generated_files() {
let config = ExclusionConfig::default();
assert!(!config.should_exclude("Cargo.lock"));
assert!(!config.should_exclude("frontend/package-lock.json"));
assert!(!config.should_exclude("api/yarn.lock"));
assert!(!config.should_exclude("packages/web/pnpm-lock.yaml"));
assert!(!config.should_exclude("backend/composer.lock"));
assert!(!config.should_exclude("python/Pipfile.lock"));
assert!(!config.should_exclude("python/poetry.lock"));
assert!(!config.should_exclude("ruby/Gemfile.lock"));
assert!(config.should_exclude("app.min.js"));
assert!(config.should_exclude("styles.min.css"));
assert!(config.should_exclude("service.pb.go"));
assert!(config.should_exclude("schema.generated.rs"));
assert!(config.should_exclude("vendor.bundle.js"));
assert!(config.should_exclude("main.chunk.js"));
assert!(!config.should_exclude("package.json"));
assert!(!config.should_exclude("Cargo.toml"));
assert!(!config.should_exclude("pyproject.toml"));
assert!(!config.should_exclude("app.js"));
assert!(!config.should_exclude("service.go"));
}
#[test]
fn test_language_extensions_complete() {
let config = LanguageConfig::default();
let exts = config.enabled_extensions();
for ext in &[
"rs", "py", "js", "jsx", "mjs", "cjs", "ts", "tsx", "mts", "cts", "go", "java", "cpp",
"cc", "cxx", "c", "h", "hpp", "cs", "rb", "php", "lua", "scala", "sc", "sh", "bash",
"json",
] {
assert!(exts.contains(*ext), "Extension '{}' should be enabled", ext);
}
}
#[test]
fn test_language_to_extensions_complete() {
let cases = vec![
("rust", vec!["rs"]),
("python", vec!["py"]),
("javascript", vec!["js", "jsx", "mjs", "cjs"]),
("typescript", vec!["ts", "tsx", "mts", "cts"]),
("csharp", vec!["cs"]),
("c#", vec!["cs"]),
("bash", vec!["sh", "bash"]),
("scala", vec!["scala", "sc"]),
("json", vec!["json"]),
];
for (name, expected_exts) in cases {
let exts = LanguageConfig::language_to_extensions(name);
for ext in expected_exts {
assert!(
exts.contains(&ext),
"Language '{}' should map to extension '{}'",
name,
ext
);
}
}
}
#[test]
fn test_string_pattern() {
let pattern = StringPattern::from("*.min.js");
assert!(pattern.matches("file.min.js"));
assert!(pattern.matches("path/to/file.min.js"));
assert!(!pattern.matches("file.js"));
let wildcard = StringPattern::from("*");
assert!(wildcard.matches("anything"));
}
#[test]
fn test_config_serialization() {
let config = ProjectConfig::default();
let toml_string = toml::to_string(&config).unwrap();
println!("{}", toml_string);
let deserialized: ProjectConfig = toml::from_str(&toml_string).unwrap();
assert_eq!(deserialized.tokens.default_budget, 2000);
}
}