1use std::collections::BTreeMap;
20use std::path::Path;
21use std::sync::LazyLock;
22
23use ignore::WalkBuilder;
24use regex::Regex;
25
26use crate::error::CoreError;
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum Lang {
31 Python,
32 JavaScript,
33 Rust,
34}
35
36impl Lang {
37 pub fn for_extension(ext: &str) -> Option<Lang> {
39 match ext {
40 "py" | "pyi" => Some(Lang::Python),
41 "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => Some(Lang::JavaScript),
42 "rs" => Some(Lang::Rust),
43 _ => None,
44 }
45 }
46}
47
48static PY_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
52 vec["']"#).unwrap(),
54 Regex::new(r#"os\.environ\.get\(\s*["']([A-Z][A-Z0-9_]*)["']"#).unwrap(),
55 Regex::new(r#"os\.environ\[\s*["']([A-Z][A-Z0-9_]*)["']\s*\]"#).unwrap(),
56 ]
57});
58static JS_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
59 vec![
60 Regex::new(r#"process\.env\.([A-Z][A-Z0-9_]*)"#).unwrap(),
61 Regex::new(r#"process\.env\[\s*["']([A-Z][A-Z0-9_]*)["']\s*\]"#).unwrap(),
62 ]
63});
64static RS_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
65 vec["']"#).unwrap()]
66});
67
68const NEVER_SECRET: &[&str] = &[
72 "PATH", "HOME", "PWD", "USER", "SHELL", "TERM", "LANG", "LC_ALL", "TMPDIR", "HOSTNAME",
73];
74
75fn patterns(lang: Lang) -> &'static [Regex] {
76 match lang {
77 Lang::Python => &PY_PATTERNS,
78 Lang::JavaScript => &JS_PATTERNS,
79 Lang::Rust => &RS_PATTERNS,
80 }
81}
82
83pub fn detect_in_source(source: &str, lang: Lang) -> Vec<String> {
88 let mut hits: Vec<(usize, String)> = Vec::new();
89 for re in patterns(lang) {
90 for caps in re.captures_iter(source) {
91 let m = caps.get(1).expect("pattern has capture group 1");
92 let name = m.as_str().to_string();
93 if !NEVER_SECRET.contains(&name.as_str()) {
94 hits.push((m.start(), name));
95 }
96 }
97 }
98 hits.sort_by_key(|(pos, _)| *pos);
99 let mut seen: Vec<String> = Vec::new();
100 for (_, name) in hits {
101 if !seen.contains(&name) {
102 seen.push(name);
103 }
104 }
105 seen
106}
107
108#[derive(Debug, Clone, PartialEq, Eq)]
110pub struct Proposal {
111 pub var: String,
113 pub coordinate: String,
115}
116
117fn slug(raw: &str, fallback: &str) -> String {
121 let mut out = String::with_capacity(raw.len());
122 let mut prev_dash = false;
123 for ch in raw.chars() {
124 if ch.is_ascii_alphanumeric() {
125 out.push(ch.to_ascii_lowercase());
126 prev_dash = false;
127 } else if !prev_dash {
128 out.push('-');
129 prev_dash = true;
130 }
131 }
132 let trimmed = out.trim_matches('-');
133 if trimmed.is_empty() {
134 fallback.to_string()
135 } else {
136 trimmed.to_string()
137 }
138}
139
140fn component_for(rel_path: &Path) -> String {
144 let top = rel_path.components().next().and_then(|c| {
145 if rel_path.components().count() > 1 {
148 c.as_os_str().to_str()
149 } else {
150 None
151 }
152 });
153 match top {
154 Some(dir) => slug(dir, "app"),
155 None => "app".to_string(),
156 }
157}
158
159pub fn coordinate_for(var: &str, component: &str) -> String {
162 format!("secret:${{ENV}}/{}/{}", component, slug(var, "value"))
163}
164
165pub fn scan_repo(root: &Path) -> Result<Vec<Proposal>, CoreError> {
170 let mut found: BTreeMap<String, String> = BTreeMap::new();
172
173 let walker = WalkBuilder::new(root)
174 .hidden(true)
179 .git_ignore(true)
180 .ignore(true)
181 .require_git(false)
184 .git_global(false)
185 .build();
186
187 let mut files: Vec<(std::path::PathBuf, Lang)> = Vec::new();
189 for entry in walker.flatten() {
190 let path = entry.path();
191 if !path.is_file() {
192 continue;
193 }
194 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
195 if name == ".env" || name.starts_with(".env.") {
197 continue;
198 }
199 let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
200 continue;
201 };
202 let Some(lang) = Lang::for_extension(ext) else {
203 continue;
204 };
205 let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
206 files.push((rel, lang));
207 }
208 files.sort_by(|a, b| a.0.cmp(&b.0));
209
210 for (rel, lang) in files {
211 let abs = root.join(&rel);
212 let Ok(bytes) = std::fs::read(&abs) else {
215 continue;
216 };
217 let source = String::from_utf8_lossy(&bytes);
218 let component = component_for(&rel);
219 for var in detect_in_source(&source, lang) {
220 found.entry(var).or_insert_with(|| component.clone());
221 }
222 }
223
224 Ok(found
225 .into_iter()
226 .map(|(var, component)| Proposal {
227 coordinate: coordinate_for(&var, &component),
228 var,
229 })
230 .collect())
231}
232
233pub fn render_env_refs(proposals: &[Proposal]) -> String {
237 let mut out = String::new();
238 out.push_str("# Proposed by `kovra scaffold` — REVIEW before use.\n");
239 out.push_str(
240 "# Holds only ADDRESSES, never values; safe to commit (replaces a plaintext .env).\n",
241 );
242 out.push_str("# `${ENV}` is substituted by `kovra run --env <e>`. Prune non-secret vars\n");
243 out.push_str("# (e.g. PORT, LOG_LEVEL) and adjust components/keys as needed.\n");
244 if proposals.is_empty() {
245 out.push_str("# (no environment-variable references detected)\n");
246 return out;
247 }
248 out.push('\n');
249 for p in proposals {
250 out.push_str(&p.var);
251 out.push('=');
252 out.push_str(&p.coordinate);
253 out.push('\n');
254 }
255 out
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261
262 #[test]
263 fn detects_python_patterns() {
264 let src = r#"
265 db = os.getenv("DATABASE_URL")
266 key = os.environ.get("STRIPE_KEY")
267 tok = os.environ["API_TOKEN"]
268 lower = os.getenv("not_a_secret") # mixed-case: ignored
269 "#;
270 let found = detect_in_source(src, Lang::Python);
271 assert_eq!(found, vec!["DATABASE_URL", "STRIPE_KEY", "API_TOKEN"]);
272 }
273
274 #[test]
275 fn detects_js_ts_patterns() {
276 let src = r#"
277 const url = process.env.DATABASE_URL;
278 const k = process.env["STRIPE_KEY"];
279 const p = process.env.PORT;
280 "#;
281 let found = detect_in_source(src, Lang::JavaScript);
282 assert_eq!(found, vec!["DATABASE_URL", "STRIPE_KEY", "PORT"]);
283 }
284
285 #[test]
286 fn detects_rust_patterns() {
287 let src = r#"
288 let u = std::env::var("DATABASE_URL").unwrap();
289 let o = env::var_os("HOME"); // NEVER_SECRET: dropped
290 let s = env::var("SECRET_KEY")?;
291 "#;
292 let found = detect_in_source(src, Lang::Rust);
293 assert_eq!(found, vec!["DATABASE_URL", "SECRET_KEY"]);
294 }
295
296 #[test]
297 fn dedups_within_a_source() {
298 let src = r#"os.getenv("X"); os.getenv("X"); os.environ["X"]"#;
299 assert_eq!(detect_in_source(src, Lang::Python), vec!["X"]);
300 }
301
302 #[test]
303 fn coordinate_uses_three_segment_grammar_with_placeholder() {
304 assert_eq!(
305 coordinate_for("DATABASE_URL", "backend"),
306 "secret:${ENV}/backend/database-url"
307 );
308 let parsed = crate::EnvRefs::parse("X=secret:${ENV}/backend/database-url").unwrap();
310 assert_eq!(parsed.vars.len(), 1);
311 }
312
313 #[test]
314 fn slug_kebab_cases_and_falls_back() {
315 assert_eq!(slug("DATABASE_URL", "x"), "database-url");
316 assert_eq!(slug("___", "fallback"), "fallback");
317 assert_eq!(slug("Mixed.Name", "x"), "mixed-name");
318 }
319
320 #[test]
321 fn component_is_top_dir_or_app() {
322 assert_eq!(component_for(Path::new("backend/db.py")), "backend");
323 assert_eq!(component_for(Path::new("main.py")), "app");
324 assert_eq!(component_for(Path::new("api/v1/handler.ts")), "api");
325 }
326
327 #[test]
328 fn render_is_valid_env_refs_and_round_trips() {
329 let proposals = vec![
330 Proposal {
331 var: "DATABASE_URL".into(),
332 coordinate: "secret:${ENV}/backend/database-url".into(),
333 },
334 Proposal {
335 var: "STRIPE_KEY".into(),
336 coordinate: "secret:${ENV}/backend/stripe-key".into(),
337 },
338 ];
339 let body = render_env_refs(&proposals);
340 let parsed = crate::EnvRefs::parse(&body).unwrap();
342 assert_eq!(parsed.vars.len(), 2);
343 assert!(body.contains("DATABASE_URL=secret:${ENV}/backend/database-url"));
344 }
345
346 #[test]
347 fn scan_repo_walks_sources_and_skips_env_files() {
348 let tmp = tempfile::tempdir().unwrap();
349 let root = tmp.path();
350 std::fs::create_dir_all(root.join("backend")).unwrap();
351 std::fs::write(
352 root.join("backend/app.py"),
353 r#"db = os.getenv("DATABASE_URL")"#,
354 )
355 .unwrap();
356 std::fs::write(root.join("web.ts"), r#"const k = process.env.STRIPE_KEY;"#).unwrap();
357 std::fs::write(root.join(".env"), "DATABASE_URL=super-secret-value\n").unwrap();
359
360 let proposals = scan_repo(root).unwrap();
361 let vars: Vec<&str> = proposals.iter().map(|p| p.var.as_str()).collect();
362 assert_eq!(vars, vec!["DATABASE_URL", "STRIPE_KEY"]);
363 let by_var: std::collections::HashMap<_, _> = proposals
365 .iter()
366 .map(|p| (p.var.as_str(), p.coordinate.as_str()))
367 .collect();
368 assert_eq!(by_var["DATABASE_URL"], "secret:${ENV}/backend/database-url");
369 assert_eq!(by_var["STRIPE_KEY"], "secret:${ENV}/app/stripe-key");
370 let body = render_env_refs(&proposals);
372 assert!(!body.contains("super-secret-value"));
373 }
374
375 #[test]
376 fn scan_repo_skips_hidden_and_vendored_dirs() {
377 let tmp = tempfile::tempdir().unwrap();
378 let root = tmp.path();
379 std::fs::create_dir_all(root.join(".venv/lib")).unwrap();
381 std::fs::write(root.join(".venv/lib/dep.py"), r#"os.getenv("VENDOR_KEY")"#).unwrap();
382 std::fs::write(root.join(".gitignore"), "node_modules/\n").unwrap();
384 std::fs::create_dir_all(root.join("node_modules/pkg")).unwrap();
385 std::fs::write(
386 root.join("node_modules/pkg/i.ts"),
387 r#"process.env.DEP_TOKEN"#,
388 )
389 .unwrap();
390 std::fs::write(root.join("app.py"), r#"os.getenv("APP_KEY")"#).unwrap();
392
393 let vars: Vec<String> = scan_repo(root)
394 .unwrap()
395 .into_iter()
396 .map(|p| p.var)
397 .collect();
398 assert_eq!(
399 vars,
400 vec!["APP_KEY"],
401 "hidden (.venv) and gitignored (node_modules) trees must be skipped"
402 );
403 }
404}