use super::SearchMode;
const CODE_EXTENSIONS: &[&str] = &[
".rs", ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", ".py", ".go", ".java", ".c", ".cpp",
".cc", ".cxx", ".h", ".hpp", ".cs", ".rb", ".swift", ".kt", ".kts", ".scala", ".ex", ".exs",
".hs", ".ml", ".elm", ".zig", ".nim", ".v", ".sol", ".sh", ".bash", ".zsh", ".fish", ".ps1",
".lua", ".r", ".jl", ".dart", ".cr", ".clj", ".cljs", ".erl", ".fs", ".fsx", ".sql",
];
const TEXT_EXTENSIONS: &[&str] = &[
".md",
".mdx",
".rst",
".txt",
".adoc",
".asciidoc",
".html",
".htm",
".tex",
".org",
".wiki",
".rtf",
];
const TEXT_NAME_PREFIXES: &[&str] = &["readme", "changelog", "license", "notice", "contributing"];
const DATA_EXTENSIONS: &[&str] = &[
".json", ".jsonl", ".ndjson", ".csv", ".tsv", ".psv", ".yaml", ".yml", ".toml", ".xml", ".xls",
".xlsx", ".ods", ".parquet", ".avro", ".arrow", ".proto", ".graphql", ".sql", ".db", ".sqlite",
".lock",
];
fn basename_lower(path: &str) -> String {
path.rsplit('/').next().unwrap_or(path).to_ascii_lowercase()
}
fn has_extension(path: &str, exts: &[&str]) -> bool {
let lower = path.to_ascii_lowercase();
exts.iter().any(|ext| lower.ends_with(ext))
}
const CODE_EXCLUDED_PATH_FRAGMENTS: &[&str] = &["claude-mpm-patch/"];
fn has_excluded_code_path(path: &str) -> bool {
let lower = path.to_ascii_lowercase();
CODE_EXCLUDED_PATH_FRAGMENTS
.iter()
.any(|frag| lower.contains(frag))
}
pub(crate) fn is_allowed_for_mode(chunk_file: &str, mode: SearchMode) -> bool {
match mode {
SearchMode::Code => {
if has_excluded_code_path(chunk_file) {
return false;
}
has_extension(chunk_file, CODE_EXTENSIONS)
}
SearchMode::Text => {
if has_extension(chunk_file, TEXT_EXTENSIONS) {
return true;
}
let bn = basename_lower(chunk_file);
TEXT_NAME_PREFIXES.iter().any(|p| bn.starts_with(p))
}
SearchMode::Data => has_extension(chunk_file, DATA_EXTENSIONS),
SearchMode::All => true,
}
}
pub(crate) fn doc_score_penalty(chunk_file: &str, mode: SearchMode) -> (f32, Option<String>) {
let mode = match mode {
SearchMode::All => SearchMode::Code,
other => other,
};
let is_text = has_extension(chunk_file, TEXT_EXTENSIONS) || {
let bn = basename_lower(chunk_file);
TEXT_NAME_PREFIXES.iter().any(|p| bn.starts_with(p))
};
let is_data = !is_text && has_extension(chunk_file, DATA_EXTENSIONS);
let is_code = !is_text && !is_data && has_extension(chunk_file, CODE_EXTENSIONS);
let (mult, reason) = if is_text {
let m = match mode {
SearchMode::Code => 0.1,
SearchMode::Text => 1.0,
SearchMode::Data => 0.3,
SearchMode::All => 1.0,
};
(m, Some(format!("text:{}", basename_lower(chunk_file))))
} else if is_data {
let m = match mode {
SearchMode::Code => 0.2,
SearchMode::Text => 0.3,
SearchMode::Data => 1.0,
SearchMode::All => 1.0,
};
(m, Some(format!("data:{}", basename_lower(chunk_file))))
} else if is_code {
let m: f32 = match mode {
SearchMode::Code => 1.0,
SearchMode::Text => 0.5,
SearchMode::Data => 0.3,
SearchMode::All => 1.0,
};
let reason = if (m - 1.0_f32).abs() > f32::EPSILON {
Some(format!("source:{}", basename_lower(chunk_file)))
} else {
None
};
(m, reason)
} else {
(1.0_f32, None)
};
if (mult - 1.0_f32).abs() < f32::EPSILON {
(1.0, None)
} else {
(mult, reason)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_code_mode_allows_source_extensions() {
for path in &[
"src/main.rs",
"src/lib/auth.ts",
"components/Button.tsx",
"pkg/handler.go",
"app/views.py",
"src/index.js",
"src/index.mjs",
"Main.java",
"kernel.c",
"engine.cpp",
"include/header.h",
"App.swift",
"Module.kt",
"lib.scala",
"build.zig",
"scripts/deploy.sh",
"lib/util.lua",
"app.rb",
"Component.fs",
] {
assert!(
is_allowed_for_mode(path, SearchMode::Code),
"{path}: expected to be allowed in code mode"
);
}
}
#[test]
fn test_code_mode_rejects_prose_and_data() {
for path in &[
"README.md",
"CHANGELOG.md",
"docs/intro.rst",
"guide.txt",
"Cargo.toml",
"package.json",
"pnpm-lock.yaml",
"schema.xml",
"rates.csv",
"LICENSE",
] {
assert!(
!is_allowed_for_mode(path, SearchMode::Code),
"{path}: expected to be rejected in code mode"
);
}
}
#[test]
fn test_text_mode_allows_prose_extensions() {
for path in &[
"docs/intro.md",
"docs/INTRO.MD",
"guide.rst",
"notes.txt",
"manual.adoc",
"docs/overview.html",
"paper.tex",
"diary.org",
] {
assert!(
is_allowed_for_mode(path, SearchMode::Text),
"{path}: expected to be allowed in text mode"
);
}
}
#[test]
fn test_text_mode_allows_named_docs_without_extension() {
for path in &[
"LICENSE",
"CHANGELOG",
"README",
"NOTICE",
"CONTRIBUTING",
"docs/CHANGELOG.rst",
"subdir/license-policy",
"ReadMe",
] {
assert!(
is_allowed_for_mode(path, SearchMode::Text),
"{path}: expected to be allowed in text mode"
);
}
}
#[test]
fn test_text_mode_rejects_source_and_data() {
for path in &[
"src/main.rs",
"src/lib/auth.ts",
"pkg/handler.go",
"Cargo.toml",
"package.json",
"config.yaml",
"schema.xml",
"rates.csv",
] {
assert!(
!is_allowed_for_mode(path, SearchMode::Text),
"{path}: expected to be rejected in text mode"
);
}
}
#[test]
fn test_data_mode_allows_data_extensions() {
for path in &[
"Cargo.toml",
"package.json",
"data.jsonl",
"config.yaml",
"config.yml",
"schema.xml",
"rates.csv",
"rates.TSV",
"Cargo.lock",
"pnpm-lock.yaml",
"migration.sql",
"schema.graphql",
"service.proto",
"data.parquet",
"db.sqlite",
] {
assert!(
is_allowed_for_mode(path, SearchMode::Data),
"{path}: expected to be allowed in data mode"
);
}
}
#[test]
fn test_data_mode_rejects_source_and_prose() {
for path in &[
"src/main.rs",
"src/lib/auth.ts",
"pkg/handler.go",
"README.md",
"CHANGELOG.md",
"LICENSE",
"docs/intro.rst",
"notes.txt",
] {
assert!(
!is_allowed_for_mode(path, SearchMode::Data),
"{path}: expected to be rejected in data mode"
);
}
}
#[test]
fn test_all_mode_allows_everything() {
for path in &[
"src/main.rs",
"README.md",
"Cargo.toml",
"LICENSE",
"rates.csv",
"schema.xml",
"weird-file-no-extension",
"",
] {
assert!(
is_allowed_for_mode(path, SearchMode::All),
"{path}: expected to be allowed in all mode"
);
}
}
#[test]
fn test_xml_is_data_not_text() {
assert!(is_allowed_for_mode("schema.xml", SearchMode::Data));
assert!(!is_allowed_for_mode("schema.xml", SearchMode::Text));
assert!(!is_allowed_for_mode("schema.xml", SearchMode::Code));
}
#[test]
fn test_toml_is_data_not_text() {
assert!(is_allowed_for_mode("Cargo.toml", SearchMode::Data));
assert!(!is_allowed_for_mode("Cargo.toml", SearchMode::Text));
assert!(!is_allowed_for_mode("Cargo.toml", SearchMode::Code));
}
#[test]
fn test_code_mode_excludes_claude_mpm_patch_paths() {
for path in &[
"claude-mpm-patch/src/main.py",
"claude-mpm-patch/docs/intro.md",
"claude-mpm-patch/CHANGELOG.md",
"CLAUDE-MPM-PATCH/src/foo.py",
"some/nested/claude-mpm-patch/file.py",
] {
assert!(
!is_allowed_for_mode(path, SearchMode::Code),
"{path}: expected to be excluded from code mode"
);
}
}
#[test]
fn test_code_mode_exclusion_does_not_affect_other_modes() {
assert!(is_allowed_for_mode(
"claude-mpm-patch/docs/intro.md",
SearchMode::Text
));
assert!(is_allowed_for_mode(
"claude-mpm-patch/config.json",
SearchMode::Data
));
assert!(is_allowed_for_mode(
"claude-mpm-patch/src/main.py",
SearchMode::All
));
}
#[test]
fn test_sql_allowed_in_code_and_data() {
for path in &[
"migrations/0001_init.sql",
"db/schema.SQL",
"queries/users.sql",
] {
assert!(
is_allowed_for_mode(path, SearchMode::Code),
"{path}: expected to be allowed in code mode"
);
assert!(
is_allowed_for_mode(path, SearchMode::Data),
"{path}: expected to be allowed in data mode"
);
assert!(
!is_allowed_for_mode(path, SearchMode::Text),
"{path}: expected to be rejected in text mode"
);
}
}
}