use globset::{Glob, GlobSet, GlobSetBuilder};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use thiserror::Error;
pub const CONFIG_FILE_NAME: &str = ".sqry-config.toml";
#[derive(Debug, Error)]
pub enum ConfigError {
#[error("Configuration file not found")]
NotFound,
#[error("Failed to parse config at {0}: {1}")]
ParseError(PathBuf, String),
#[error("Failed to read config at {0}: {1}")]
IoError(PathBuf, std::io::Error),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(default)]
pub struct ProjectConfig {
#[serde(default)]
pub ignore: IgnoreConfig,
#[serde(default)]
pub include: IncludeConfig,
#[serde(default)]
pub languages: LanguageConfig,
#[serde(default)]
pub indexing: IndexingConfig,
#[serde(default)]
pub cache: CacheConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct IgnoreConfig {
#[serde(default = "default_ignore_patterns")]
pub patterns: Vec<String>,
}
impl Default for IgnoreConfig {
fn default() -> Self {
Self {
patterns: default_ignore_patterns(),
}
}
}
fn default_ignore_patterns() -> Vec<String> {
vec![
"node_modules/**".to_string(),
"target/**".to_string(),
"dist/**".to_string(),
"*.min.js".to_string(),
"vendor/**".to_string(),
".git/**".to_string(),
"__pycache__/**".to_string(),
".pytest_cache/**".to_string(),
".mypy_cache/**".to_string(),
".tox/**".to_string(),
".venv/**".to_string(),
"venv/**".to_string(),
".gradle/**".to_string(),
".idea/**".to_string(),
".vs/**".to_string(),
".vscode/**".to_string(),
]
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(default)]
pub struct IncludeConfig {
#[serde(default)]
pub patterns: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(default)]
pub struct LanguageConfig {
#[serde(default)]
pub extensions: HashMap<String, String>,
#[serde(default)]
pub files: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct IndexingConfig {
#[serde(default = "default_max_file_size")]
pub max_file_size: u64,
#[serde(default = "default_max_depth")]
pub max_depth: u32,
#[serde(default = "default_true")]
pub enable_scope_extraction: bool,
#[serde(default = "default_true")]
pub enable_relation_extraction: bool,
#[serde(default)]
pub additional_ignored_dirs: Vec<String>,
}
impl Default for IndexingConfig {
fn default() -> Self {
Self {
max_file_size: default_max_file_size(),
max_depth: default_max_depth(),
enable_scope_extraction: true,
enable_relation_extraction: true,
additional_ignored_dirs: Vec::new(),
}
}
}
fn default_max_file_size() -> u64 {
10_485_760 }
fn default_max_depth() -> u32 {
100
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct CacheConfig {
#[serde(default = "default_cache_directory")]
pub directory: String,
#[serde(default = "default_true")]
pub persistent: bool,
}
impl Default for CacheConfig {
fn default() -> Self {
Self {
directory: default_cache_directory(),
persistent: true,
}
}
}
fn default_cache_directory() -> String {
".sqry-cache".to_string()
}
impl ProjectConfig {
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
let path = path.as_ref();
let contents = std::fs::read_to_string(path)
.map_err(|e| ConfigError::IoError(path.to_path_buf(), e))?;
toml::from_str(&contents)
.map_err(|e| ConfigError::ParseError(path.to_path_buf(), e.to_string()))
}
pub fn load_from_index_root<P: AsRef<Path>>(index_root: P) -> Self {
match Self::try_load_from_index_root(index_root.as_ref()) {
Ok(config) => config,
Err(ConfigError::NotFound) => {
Self::default()
}
Err(ConfigError::ParseError(path, err)) => {
log::warn!(
"Malformed {} at {}: {}. Using defaults.",
CONFIG_FILE_NAME,
path.display(),
err
);
Self::default()
}
Err(ConfigError::IoError(path, err)) => {
log::warn!(
"Cannot read {} at {}: {}. Using defaults.",
CONFIG_FILE_NAME,
path.display(),
err
);
Self::default()
}
}
}
fn try_load_from_index_root(index_root: &Path) -> Result<Self, ConfigError> {
let mut current = index_root;
loop {
let config_path = current.join(CONFIG_FILE_NAME);
if config_path.exists() {
return Self::load(&config_path);
}
match current.parent() {
Some(parent) if !parent.as_os_str().is_empty() => {
current = parent;
}
_ => break, }
}
Err(ConfigError::NotFound)
}
#[must_use]
pub fn effective_ignored_dirs(&self) -> Vec<&str> {
use crate::project::path_utils::DEFAULT_IGNORED_DIRS;
let mut dirs: Vec<&str> = DEFAULT_IGNORED_DIRS.to_vec();
for dir in &self.indexing.additional_ignored_dirs {
dirs.push(dir.as_str());
}
dirs
}
#[must_use]
pub fn is_ignored(&self, path: &Path) -> bool {
let normalized = normalize_path_for_matching(path);
let ignore_set = match build_glob_set(&self.ignore.patterns) {
Ok(set) => set,
Err(e) => {
log::warn!("Invalid ignore pattern: {e}");
return false;
}
};
if ignore_set.is_match(&normalized) {
if !self.include.patterns.is_empty() {
let include_set = match build_glob_set(&self.include.patterns) {
Ok(set) => set,
Err(e) => {
log::warn!("Invalid include pattern: {e}");
return true; }
};
if include_set.is_match(&normalized) {
return false; }
}
return true;
}
false
}
#[must_use]
pub fn language_for_path(&self, path: &Path) -> Option<&str> {
if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
for (pattern, lang) in &self.languages.files {
if glob_match_filename(pattern, filename) {
return Some(lang.as_str());
}
}
}
if let Some(ext) = path.extension().and_then(|e| e.to_str())
&& let Some(lang) = self.languages.extensions.get(ext)
{
return Some(lang.as_str());
}
None
}
}
fn build_glob_set(patterns: &[String]) -> Result<GlobSet, globset::Error> {
let mut builder = GlobSetBuilder::new();
for pattern in patterns {
for normalized in normalize_gitignore_pattern(pattern) {
let glob = Glob::new(&normalized)?;
builder.add(glob);
}
}
builder.build()
}
fn normalize_gitignore_pattern(pattern: &str) -> Vec<String> {
if let Some(stripped) = pattern.strip_prefix('/') {
return normalize_rooted_pattern(stripped);
}
if pattern.starts_with("**/") {
return vec![pattern.to_string()];
}
let pattern_core = pattern
.strip_suffix("/**")
.or_else(|| pattern.strip_suffix('/'))
.unwrap_or(pattern);
if pattern_core.contains('/') {
if pattern.ends_with('/') && !pattern.ends_with("/**") {
let dir_name = pattern.trim_end_matches('/');
return vec![dir_name.to_string(), format!("{dir_name}/**")];
}
return vec![pattern.to_string()];
}
if pattern.ends_with("/**") {
return vec![format!("**/{pattern}")];
}
if pattern.ends_with('/') {
let dir_name = pattern.trim_end_matches('/');
return vec![format!("**/{dir_name}"), format!("**/{dir_name}/**")];
}
vec![format!("**/{pattern}")]
}
fn normalize_rooted_pattern(pattern: &str) -> Vec<String> {
if pattern.ends_with("/**") {
return vec![pattern.to_string()];
}
if pattern.ends_with('/') {
let dir_name = pattern.trim_end_matches('/');
return vec![dir_name.to_string(), format!("{dir_name}/**")];
}
let last_segment = pattern.rsplit(['/', '\\']).next().unwrap_or(pattern);
let has_glob =
last_segment.contains('*') || last_segment.contains('?') || last_segment.contains('[');
if has_glob {
return vec![pattern.to_string()];
}
let looks_like_file = if let Some(name) = last_segment.strip_prefix('.') {
name.ends_with("ignore") || name.ends_with("rc") || name.ends_with("attributes") || name.ends_with("modules") || (name != "config" && name.ends_with("config"))
|| name.contains('.') } else if let Some(dot_pos) = last_segment.rfind('.') {
dot_pos > 0 && dot_pos < last_segment.len() - 1
} else {
false
};
if looks_like_file {
vec![pattern.to_string()]
} else {
vec![pattern.to_string(), format!("{pattern}/**")]
}
}
fn glob_match_filename(pattern: &str, filename: &str) -> bool {
match Glob::new(pattern) {
Ok(glob) => glob.compile_matcher().is_match(filename),
Err(_) => pattern == filename,
}
}
fn normalize_path_for_matching(path: &Path) -> String {
let path_str = path.to_string_lossy();
let normalized = path_str.replace('\\', "/");
normalized
.strip_prefix('/')
.unwrap_or(&normalized)
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_default_config() {
let config = ProjectConfig::default();
assert!(!config.ignore.patterns.is_empty());
assert!(config.include.patterns.is_empty());
assert!(config.languages.extensions.is_empty());
assert_eq!(config.indexing.max_file_size, 10_485_760);
assert_eq!(config.indexing.max_depth, 100);
assert!(config.indexing.enable_scope_extraction);
assert!(config.indexing.enable_relation_extraction);
assert_eq!(config.cache.directory, ".sqry-cache");
assert!(config.cache.persistent);
}
#[test]
fn test_load_config_from_file() {
let temp = TempDir::new().unwrap();
let config_path = temp.path().join(CONFIG_FILE_NAME);
let toml_content = r#"
[ignore]
patterns = ["custom/**", "*.bak"]
[include]
patterns = ["custom/important/**"]
[languages]
extensions = { "jsx" = "javascript" }
files = { "Jenkinsfile" = "groovy" }
[indexing]
max_file_size = 5242880
max_depth = 50
enable_scope_extraction = false
additional_ignored_dirs = ["my_vendor"]
[cache]
directory = ".my-cache"
persistent = false
"#;
std::fs::write(&config_path, toml_content).unwrap();
let config = ProjectConfig::load(&config_path).unwrap();
assert_eq!(config.ignore.patterns, vec!["custom/**", "*.bak"]);
assert_eq!(config.include.patterns, vec!["custom/important/**"]);
assert_eq!(
config.languages.extensions.get("jsx"),
Some(&"javascript".to_string())
);
assert_eq!(
config.languages.files.get("Jenkinsfile"),
Some(&"groovy".to_string())
);
assert_eq!(config.indexing.max_file_size, 5_242_880);
assert_eq!(config.indexing.max_depth, 50);
assert!(!config.indexing.enable_scope_extraction);
assert_eq!(config.indexing.additional_ignored_dirs, vec!["my_vendor"]);
assert_eq!(config.cache.directory, ".my-cache");
assert!(!config.cache.persistent);
}
#[test]
fn test_load_config_ancestor_walk() {
let temp = TempDir::new().unwrap();
let nested = temp.path().join("level1/level2/level3");
std::fs::create_dir_all(&nested).unwrap();
let config_path = temp.path().join("level1").join(CONFIG_FILE_NAME);
std::fs::write(
&config_path,
r"
[indexing]
max_depth = 42
",
)
.unwrap();
let config = ProjectConfig::load_from_index_root(&nested);
assert_eq!(config.indexing.max_depth, 42);
}
#[test]
fn test_load_config_not_found_uses_defaults() {
let temp = TempDir::new().unwrap();
let config = ProjectConfig::load_from_index_root(temp.path());
assert_eq!(config, ProjectConfig::default());
}
#[test]
fn test_partial_config_uses_defaults() {
let temp = TempDir::new().unwrap();
let config_path = temp.path().join(CONFIG_FILE_NAME);
std::fs::write(
&config_path,
r"
[indexing]
max_depth = 25
",
)
.unwrap();
let config = ProjectConfig::load(&config_path).unwrap();
assert_eq!(config.indexing.max_depth, 25);
assert_eq!(config.indexing.max_file_size, 10_485_760);
assert!(config.indexing.enable_scope_extraction);
assert_eq!(config.cache.directory, ".sqry-cache");
}
#[test]
fn test_effective_ignored_dirs() {
let mut config = ProjectConfig::default();
config.indexing.additional_ignored_dirs =
vec!["my_vendor".to_string(), "artifacts".to_string()];
let dirs = config.effective_ignored_dirs();
assert!(dirs.contains(&"node_modules"));
assert!(dirs.contains(&"target"));
assert!(dirs.contains(&"my_vendor"));
assert!(dirs.contains(&"artifacts"));
}
#[test]
fn test_language_for_path() {
let mut config = ProjectConfig::default();
config
.languages
.extensions
.insert("jsx".to_string(), "javascript".to_string());
config
.languages
.files
.insert("Jenkinsfile".to_string(), "groovy".to_string());
assert_eq!(
config.language_for_path(Path::new("src/App.jsx")),
Some("javascript")
);
assert_eq!(
config.language_for_path(Path::new("ci/Jenkinsfile")),
Some("groovy")
);
assert_eq!(config.language_for_path(Path::new("src/main.rs")), None);
}
#[test]
fn test_glob_match_filename() {
assert!(glob_match_filename("*.js", "app.js"));
assert!(!glob_match_filename("*.js", "app.ts"));
assert!(glob_match_filename("file?.txt", "file1.txt"));
assert!(!glob_match_filename("file?.txt", "file12.txt"));
assert!(glob_match_filename("Jenkinsfile", "Jenkinsfile"));
assert!(!glob_match_filename("Jenkinsfile", "Jenkinsfile.bak"));
}
#[test]
fn test_is_ignored_basic() {
let config = ProjectConfig::default();
assert!(config.is_ignored(Path::new("node_modules/foo.js")));
assert!(config.is_ignored(Path::new("target/debug/binary")));
assert!(config.is_ignored(Path::new(".git/config")));
assert!(config.is_ignored(Path::new("__pycache__/module.pyc")));
assert!(!config.is_ignored(Path::new("src/main.rs")));
assert!(!config.is_ignored(Path::new("lib/utils.js")));
}
#[test]
fn test_is_ignored_nested_paths() {
let config = ProjectConfig::default();
assert!(config.is_ignored(Path::new("packages/frontend/node_modules/react/index.js")));
assert!(config.is_ignored(Path::new("deep/nested/path/node_modules/pkg/lib.js")));
assert!(config.is_ignored(Path::new("crates/lib/target/release/libfoo.so")));
}
#[test]
fn test_is_ignored_absolute_paths() {
let config = ProjectConfig::default();
assert!(config.is_ignored(Path::new("/home/user/project/node_modules/pkg/index.js")));
assert!(config.is_ignored(Path::new("/tmp/build/target/debug/app")));
assert!(config.is_ignored(Path::new("/var/repo/.git/objects/pack/abc")));
}
#[test]
fn test_is_ignored_include_overrides() {
let mut config = ProjectConfig::default();
config.ignore.patterns = vec!["vendor/**".to_string()];
config.include.patterns = vec!["vendor/internal/**".to_string()];
assert!(config.is_ignored(Path::new("vendor/external/lib.js")));
assert!(config.is_ignored(Path::new("vendor/third_party/pkg.py")));
assert!(!config.is_ignored(Path::new("vendor/internal/core.rs")));
assert!(!config.is_ignored(Path::new("vendor/internal/nested/utils.rs")));
}
#[test]
fn test_is_ignored_extension_patterns() {
let mut config = ProjectConfig::default();
config.ignore.patterns = vec!["*.min.js".to_string(), "*.bak".to_string()];
assert!(config.is_ignored(Path::new("dist/app.min.js")));
assert!(config.is_ignored(Path::new("src/old.bak")));
assert!(config.is_ignored(Path::new("deeply/nested/file.min.js")));
assert!(!config.is_ignored(Path::new("src/app.js")));
}
#[test]
fn test_normalize_gitignore_pattern() {
assert_eq!(
normalize_gitignore_pattern("node_modules/**"),
vec!["**/node_modules/**"]
);
assert_eq!(normalize_gitignore_pattern("*.js"), vec!["**/*.js"]);
assert_eq!(normalize_gitignore_pattern("target"), vec!["**/target"]);
assert_eq!(
normalize_gitignore_pattern("**/node_modules"),
vec!["**/node_modules"]
);
assert_eq!(
normalize_gitignore_pattern("/build"),
vec!["build", "build/**"]
);
assert_eq!(normalize_gitignore_pattern("/dist/**"), vec!["dist/**"]);
assert_eq!(
normalize_gitignore_pattern("/config.json"),
vec!["config.json"]
);
assert_eq!(normalize_gitignore_pattern("/*.txt"), vec!["*.txt"]);
assert_eq!(
normalize_gitignore_pattern("/build/"),
vec!["build", "build/**"]
);
assert_eq!(normalize_gitignore_pattern("docs/*.md"), vec!["docs/*.md"]);
assert_eq!(
normalize_gitignore_pattern("src/vendor"),
vec!["src/vendor"]
);
assert_eq!(
normalize_gitignore_pattern("build/"),
vec!["**/build", "**/build/**"]
);
}
#[test]
fn test_build_glob_set() {
let patterns = vec!["node_modules/**".to_string(), "*.min.js".to_string()];
let glob_set = build_glob_set(&patterns).unwrap();
assert!(glob_set.is_match("src/node_modules/pkg/index.js"));
assert!(glob_set.is_match("app.min.js"));
assert!(glob_set.is_match("dist/bundle.min.js"));
assert!(!glob_set.is_match("src/main.rs"));
assert!(!glob_set.is_match("app.js"));
}
#[test]
fn test_rooted_pattern_matches_contents() {
let patterns = vec!["/build".to_string()];
let glob_set = build_glob_set(&patterns).unwrap();
assert!(glob_set.is_match("build"));
assert!(glob_set.is_match("build/output.log"));
assert!(glob_set.is_match("build/subdir/file.txt"));
assert!(!glob_set.is_match("src/build/output.log"));
assert!(!glob_set.is_match("packages/build"));
}
#[test]
fn test_slash_containing_patterns_are_root_relative() {
let patterns = vec!["docs/*.md".to_string()];
let glob_set = build_glob_set(&patterns).unwrap();
assert!(glob_set.is_match("docs/readme.md"));
assert!(glob_set.is_match("docs/api.md"));
assert!(!glob_set.is_match("packages/foo/docs/readme.md"));
assert!(!glob_set.is_match("src/docs/notes.md"));
}
#[test]
fn test_simple_patterns_match_anywhere() {
let patterns = vec!["*.bak".to_string(), "node_modules".to_string()];
let glob_set = build_glob_set(&patterns).unwrap();
assert!(glob_set.is_match("file.bak"));
assert!(glob_set.is_match("src/file.bak"));
assert!(glob_set.is_match("deep/nested/path/file.bak"));
assert!(glob_set.is_match("node_modules"));
assert!(glob_set.is_match("packages/frontend/node_modules"));
}
#[test]
fn test_dotted_directories_expand_to_contents() {
assert_eq!(
normalize_gitignore_pattern("/.git"),
vec![".git", ".git/**"]
);
assert_eq!(
normalize_gitignore_pattern("/.sqry-cache"),
vec![".sqry-cache", ".sqry-cache/**"]
);
assert_eq!(
normalize_gitignore_pattern("/.hidden"),
vec![".hidden", ".hidden/**"]
);
assert_eq!(
normalize_gitignore_pattern("/.config"),
vec![".config", ".config/**"]
);
assert_eq!(
normalize_gitignore_pattern("/.gitconfig"),
vec![".gitconfig"]
);
assert_eq!(
normalize_gitignore_pattern("/.editorconfig"),
vec![".editorconfig"]
);
assert_eq!(
normalize_gitignore_pattern("/.gitignore"),
vec![".gitignore"]
);
assert_eq!(
normalize_gitignore_pattern("/config.json"),
vec!["config.json"]
);
assert_eq!(
normalize_gitignore_pattern("/.env.local"),
vec![".env.local"]
);
let patterns = vec!["/.git".to_string()];
let glob_set = build_glob_set(&patterns).unwrap();
assert!(glob_set.is_match(".git"));
assert!(glob_set.is_match(".git/config"));
assert!(glob_set.is_match(".git/objects/pack/abc123"));
assert!(glob_set.is_match(".git/refs/heads/main"));
assert!(!glob_set.is_match("submodule/.git"));
assert!(!glob_set.is_match("packages/sub/.git/config"));
}
#[test]
fn test_rooted_patterns_with_relative_paths() {
let mut config = ProjectConfig::default();
config.ignore.patterns = vec!["/build".to_string(), "/.git".to_string()];
assert!(config.is_ignored(Path::new("build/output.log")));
assert!(config.is_ignored(Path::new("build")));
assert!(config.is_ignored(Path::new(".git/config")));
assert!(config.is_ignored(Path::new(".git")));
assert!(!config.is_ignored(Path::new("src/build/output.log")));
assert!(!config.is_ignored(Path::new("packages/sub/build")));
assert!(!config.is_ignored(Path::new("submodule/.git")));
}
#[test]
fn test_unrooted_patterns_with_absolute_paths() {
let config = ProjectConfig::default();
assert!(config.is_ignored(Path::new("/home/user/project/node_modules/pkg/index.js")));
assert!(config.is_ignored(Path::new("/tmp/build/target/debug/app")));
assert!(config.is_ignored(Path::new("/var/repo/.git/objects/pack/abc")));
assert!(config.is_ignored(Path::new("node_modules/pkg/index.js")));
assert!(config.is_ignored(Path::new("target/debug/app")));
}
#[test]
fn test_normalize_path_for_matching() {
assert_eq!(
normalize_path_for_matching(Path::new("/home/user/project/src/main.rs")),
"home/user/project/src/main.rs"
);
assert_eq!(
normalize_path_for_matching(Path::new("relative/path/file.rs")),
"relative/path/file.rs"
);
assert_eq!(normalize_path_for_matching(Path::new("/build")), "build");
}
}