use std::collections::BTreeMap;
use std::path::Path;
use std::sync::LazyLock;
use ignore::WalkBuilder;
use regex::Regex;
use crate::error::CoreError;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Lang {
Python,
JavaScript,
Rust,
}
impl Lang {
pub fn for_extension(ext: &str) -> Option<Lang> {
match ext {
"py" | "pyi" => Some(Lang::Python),
"js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => Some(Lang::JavaScript),
"rs" => Some(Lang::Rust),
_ => None,
}
}
}
static PY_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec["']"#).unwrap(),
Regex::new(r#"os\.environ\.get\(\s*["']([A-Z][A-Z0-9_]*)["']"#).unwrap(),
Regex::new(r#"os\.environ\[\s*["']([A-Z][A-Z0-9_]*)["']\s*\]"#).unwrap(),
]
});
static JS_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r#"process\.env\.([A-Z][A-Z0-9_]*)"#).unwrap(),
Regex::new(r#"process\.env\[\s*["']([A-Z][A-Z0-9_]*)["']\s*\]"#).unwrap(),
]
});
static RS_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec["']"#).unwrap()]
});
const NEVER_SECRET: &[&str] = &[
"PATH", "HOME", "PWD", "USER", "SHELL", "TERM", "LANG", "LC_ALL", "TMPDIR", "HOSTNAME",
];
fn patterns(lang: Lang) -> &'static [Regex] {
match lang {
Lang::Python => &PY_PATTERNS,
Lang::JavaScript => &JS_PATTERNS,
Lang::Rust => &RS_PATTERNS,
}
}
pub fn detect_in_source(source: &str, lang: Lang) -> Vec<String> {
let mut hits: Vec<(usize, String)> = Vec::new();
for re in patterns(lang) {
for caps in re.captures_iter(source) {
let m = caps.get(1).expect("pattern has capture group 1");
let name = m.as_str().to_string();
if !NEVER_SECRET.contains(&name.as_str()) {
hits.push((m.start(), name));
}
}
}
hits.sort_by_key(|(pos, _)| *pos);
let mut seen: Vec<String> = Vec::new();
for (_, name) in hits {
if !seen.contains(&name) {
seen.push(name);
}
}
seen
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Proposal {
pub var: String,
pub coordinate: String,
}
fn slug(raw: &str, fallback: &str) -> String {
let mut out = String::with_capacity(raw.len());
let mut prev_dash = false;
for ch in raw.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch.to_ascii_lowercase());
prev_dash = false;
} else if !prev_dash {
out.push('-');
prev_dash = true;
}
}
let trimmed = out.trim_matches('-');
if trimmed.is_empty() {
fallback.to_string()
} else {
trimmed.to_string()
}
}
fn component_for(rel_path: &Path) -> String {
let top = rel_path.components().next().and_then(|c| {
if rel_path.components().count() > 1 {
c.as_os_str().to_str()
} else {
None
}
});
match top {
Some(dir) => slug(dir, "app"),
None => "app".to_string(),
}
}
pub fn coordinate_for(var: &str, component: &str) -> String {
format!("secret:${{ENV}}/{}/{}", component, slug(var, "value"))
}
pub fn scan_repo(root: &Path) -> Result<Vec<Proposal>, CoreError> {
let mut found: BTreeMap<String, String> = BTreeMap::new();
let walker = WalkBuilder::new(root)
.hidden(true)
.git_ignore(true)
.ignore(true)
.require_git(false)
.git_global(false)
.build();
let mut files: Vec<(std::path::PathBuf, Lang)> = Vec::new();
for entry in walker.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name == ".env" || name.starts_with(".env.") {
continue;
}
let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
continue;
};
let Some(lang) = Lang::for_extension(ext) else {
continue;
};
let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
files.push((rel, lang));
}
files.sort_by(|a, b| a.0.cmp(&b.0));
for (rel, lang) in files {
let abs = root.join(&rel);
let Ok(bytes) = std::fs::read(&abs) else {
continue;
};
let source = String::from_utf8_lossy(&bytes);
let component = component_for(&rel);
for var in detect_in_source(&source, lang) {
found.entry(var).or_insert_with(|| component.clone());
}
}
Ok(found
.into_iter()
.map(|(var, component)| Proposal {
coordinate: coordinate_for(&var, &component),
var,
})
.collect())
}
pub fn render_env_refs(proposals: &[Proposal]) -> String {
let mut out = String::new();
out.push_str("# Proposed by `kovra scaffold` — REVIEW before use.\n");
out.push_str(
"# Holds only ADDRESSES, never values; safe to commit (replaces a plaintext .env).\n",
);
out.push_str("# `${ENV}` is substituted by `kovra run --env <e>`. Prune non-secret vars\n");
out.push_str("# (e.g. PORT, LOG_LEVEL) and adjust components/keys as needed.\n");
if proposals.is_empty() {
out.push_str("# (no environment-variable references detected)\n");
return out;
}
out.push('\n');
for p in proposals {
out.push_str(&p.var);
out.push('=');
out.push_str(&p.coordinate);
out.push('\n');
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_python_patterns() {
let src = r#"
db = os.getenv("DATABASE_URL")
key = os.environ.get("STRIPE_KEY")
tok = os.environ["API_TOKEN"]
lower = os.getenv("not_a_secret") # mixed-case: ignored
"#;
let found = detect_in_source(src, Lang::Python);
assert_eq!(found, vec!["DATABASE_URL", "STRIPE_KEY", "API_TOKEN"]);
}
#[test]
fn detects_js_ts_patterns() {
let src = r#"
const url = process.env.DATABASE_URL;
const k = process.env["STRIPE_KEY"];
const p = process.env.PORT;
"#;
let found = detect_in_source(src, Lang::JavaScript);
assert_eq!(found, vec!["DATABASE_URL", "STRIPE_KEY", "PORT"]);
}
#[test]
fn detects_rust_patterns() {
let src = r#"
let u = std::env::var("DATABASE_URL").unwrap();
let o = env::var_os("HOME"); // NEVER_SECRET: dropped
let s = env::var("SECRET_KEY")?;
"#;
let found = detect_in_source(src, Lang::Rust);
assert_eq!(found, vec!["DATABASE_URL", "SECRET_KEY"]);
}
#[test]
fn dedups_within_a_source() {
let src = r#"os.getenv("X"); os.getenv("X"); os.environ["X"]"#;
assert_eq!(detect_in_source(src, Lang::Python), vec!["X"]);
}
#[test]
fn coordinate_uses_three_segment_grammar_with_placeholder() {
assert_eq!(
coordinate_for("DATABASE_URL", "backend"),
"secret:${ENV}/backend/database-url"
);
let parsed = crate::EnvRefs::parse("X=secret:${ENV}/backend/database-url").unwrap();
assert_eq!(parsed.vars.len(), 1);
}
#[test]
fn slug_kebab_cases_and_falls_back() {
assert_eq!(slug("DATABASE_URL", "x"), "database-url");
assert_eq!(slug("___", "fallback"), "fallback");
assert_eq!(slug("Mixed.Name", "x"), "mixed-name");
}
#[test]
fn component_is_top_dir_or_app() {
assert_eq!(component_for(Path::new("backend/db.py")), "backend");
assert_eq!(component_for(Path::new("main.py")), "app");
assert_eq!(component_for(Path::new("api/v1/handler.ts")), "api");
}
#[test]
fn render_is_valid_env_refs_and_round_trips() {
let proposals = vec![
Proposal {
var: "DATABASE_URL".into(),
coordinate: "secret:${ENV}/backend/database-url".into(),
},
Proposal {
var: "STRIPE_KEY".into(),
coordinate: "secret:${ENV}/backend/stripe-key".into(),
},
];
let body = render_env_refs(&proposals);
let parsed = crate::EnvRefs::parse(&body).unwrap();
assert_eq!(parsed.vars.len(), 2);
assert!(body.contains("DATABASE_URL=secret:${ENV}/backend/database-url"));
}
#[test]
fn scan_repo_walks_sources_and_skips_env_files() {
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
std::fs::create_dir_all(root.join("backend")).unwrap();
std::fs::write(
root.join("backend/app.py"),
r#"db = os.getenv("DATABASE_URL")"#,
)
.unwrap();
std::fs::write(root.join("web.ts"), r#"const k = process.env.STRIPE_KEY;"#).unwrap();
std::fs::write(root.join(".env"), "DATABASE_URL=super-secret-value\n").unwrap();
let proposals = scan_repo(root).unwrap();
let vars: Vec<&str> = proposals.iter().map(|p| p.var.as_str()).collect();
assert_eq!(vars, vec!["DATABASE_URL", "STRIPE_KEY"]);
let by_var: std::collections::HashMap<_, _> = proposals
.iter()
.map(|p| (p.var.as_str(), p.coordinate.as_str()))
.collect();
assert_eq!(by_var["DATABASE_URL"], "secret:${ENV}/backend/database-url");
assert_eq!(by_var["STRIPE_KEY"], "secret:${ENV}/app/stripe-key");
let body = render_env_refs(&proposals);
assert!(!body.contains("super-secret-value"));
}
#[test]
fn scan_repo_skips_hidden_and_vendored_dirs() {
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
std::fs::create_dir_all(root.join(".venv/lib")).unwrap();
std::fs::write(root.join(".venv/lib/dep.py"), r#"os.getenv("VENDOR_KEY")"#).unwrap();
std::fs::write(root.join(".gitignore"), "node_modules/\n").unwrap();
std::fs::create_dir_all(root.join("node_modules/pkg")).unwrap();
std::fs::write(
root.join("node_modules/pkg/i.ts"),
r#"process.env.DEP_TOKEN"#,
)
.unwrap();
std::fs::write(root.join("app.py"), r#"os.getenv("APP_KEY")"#).unwrap();
let vars: Vec<String> = scan_repo(root)
.unwrap()
.into_iter()
.map(|p| p.var)
.collect();
assert_eq!(
vars,
vec!["APP_KEY"],
"hidden (.venv) and gitignored (node_modules) trees must be skipped"
);
}
}