1use crate::report::SanitizeReport;
41use std::fmt::Write as FmtWrite;
42use std::fs;
43use std::path::PathBuf;
44
45pub type LlmEntry = (String, Vec<u8>);
47
48pub type LlmPathEntry = (String, PathBuf);
54
55pub const PROMPT_PREAMBLE: &str = "\
58Content sanitized:
59- Structured fields (passwords, tokens, API keys) → __SANITIZED-<hash>__
60- Typed values (emails, IPs, hostnames, UUIDs) → same-format/length substitute
61
62Same original = same replacement per run. Repeated values safe to correlate.
63
64No inferring originals. Critical value missing? Ask specific clarifying question. Use sanitized forms only.
65";
66
67pub const TEMPLATE_TROUBLESHOOT: &str = "\
69SRE troubleshooting incident. Logs/output sanitized.
70
71{preamble}
72Provide:
731. Root cause — specific triggering failure
742. Event sequence — timeline to failure
753. Remediation — concrete fix + prevent recurrence
76
77Data insufficient? State what info needed and why. No speculating on sanitized values.
78
79";
80
81pub const TEMPLATE_REVIEW_CONFIG: &str = "\
83Systems engineer reviewing sanitized config.
84
85{preamble}
86Identify:
871. Misconfigurations — invalid/inconsistent settings causing failures
882. Security concerns — exposed services, permissive rules, weak/default settings
893. Best practice violations — deprecated options, missing fields, non-standard patterns
904. Credential placement — flag secret locations; presence/placement = hardcoding risk
91
92Cannot assess redacted credential strength. Risk depends on actual value? Flag + ask.
93
94";
95
96pub const TEMPLATE_REVIEW_SECURITY: &str = "\
98Security engineer: posture review of sanitized config/logs.
99
100{preamble}
101Assess and report:
1021. Authentication/authz — weak configs, missing enforcement, privilege issues
1032. Network exposure — ports/services/interfaces needing restriction
1043. Encryption/TLS — weak ciphers, outdated protocols, insecure defaults
1054. Hardcoded secrets — flag credential locations; presence/placement = finding
1065. Known CVEs — tie visible version strings to known weaknesses
1076. Cannot assess — list findings needing original values (e.g. password strength, token format)
108
109Cite field/file/line per finding. No guessing sanitized values. Need actual value? Ask specifically.
110
111";
112
113pub fn resolve_llm_template(template_name: &str) -> Result<String, String> {
123 match template_name {
124 "troubleshoot" => Ok(TEMPLATE_TROUBLESHOOT.replace("{preamble}", PROMPT_PREAMBLE)),
125 "review-config" => Ok(TEMPLATE_REVIEW_CONFIG.replace("{preamble}", PROMPT_PREAMBLE)),
126 "review-security" => Ok(TEMPLATE_REVIEW_SECURITY.replace("{preamble}", PROMPT_PREAMBLE)),
127 path => fs::read_to_string(path)
128 .map_err(|e| format!("failed to read LLM template '{}': {e}", path)),
129 }
130}
131
132pub fn format_llm_prompt(
146 template_name: &str,
147 entries: &[LlmEntry],
148 report: Option<&SanitizeReport>,
149) -> Result<String, String> {
150 let mut out = resolve_llm_template(template_name)?;
151
152 if let Some(r) = report {
153 let total_replacements: u64 = r.files.iter().map(|f| f.replacements).sum();
154 write!(
155 out,
156 "## Sanitization Summary\n\
157 - Files processed: {}\n\
158 - Total replacements: {total_replacements}\n\n",
159 r.files.len()
160 )
161 .unwrap();
162 }
163
164 if !entries.is_empty() {
165 out.push_str("## Files Analyzed\n");
166 for (label, _) in entries {
167 writeln!(out, "- {label}").unwrap();
168 }
169 out.push('\n');
170 }
171
172 for (label, bytes) in entries {
173 let content = String::from_utf8_lossy(bytes);
174 write!(
175 out,
176 "<content name=\"{}\">\n{}\n</content>\n\n",
177 label, content
178 )
179 .unwrap();
180 }
181
182 if let Some(r) = report {
183 append_notable_events(&mut out, r);
184 }
185
186 Ok(out)
187}
188
189pub fn format_llm_prompt_reference(
199 template_name: &str,
200 entries: &[LlmPathEntry],
201 report: Option<&SanitizeReport>,
202) -> Result<String, String> {
203 let mut out = resolve_llm_template(template_name)?;
204
205 if let Some(r) = report {
206 let total_replacements: u64 = r.files.iter().map(|f| f.replacements).sum();
207 write!(
208 out,
209 "## Sanitization Summary\n\
210 - Files processed: {}\n\
211 - Total replacements: {total_replacements}\n\n",
212 r.files.len()
213 )
214 .unwrap();
215 }
216
217 if !entries.is_empty() {
218 out.push_str("## Sanitized Files\n");
219 out.push_str("Read each path below to review the sanitized content:\n\n");
220 for (label, out_path) in entries {
221 writeln!(out, "- {} → {}", label, out_path.display()).unwrap();
222 }
223 out.push('\n');
224 }
225
226 if let Some(r) = report {
227 append_notable_events(&mut out, r);
228 }
229
230 Ok(out)
231}
232
233fn append_notable_events(out: &mut String, report: &SanitizeReport) {
236 let notable: Vec<_> = report
237 .files
238 .iter()
239 .filter_map(|f| f.log_context.as_ref().map(|ctx| (&f.path, ctx)))
240 .filter(|(_, ctx)| ctx.match_count > 0)
241 .collect();
242
243 if notable.is_empty() {
244 return;
245 }
246
247 out.push_str("<notable_events>\n");
248 let mut any_truncated = false;
249 for (path, ctx) in ¬able {
250 writeln!(out, "# {path}").unwrap();
251 for m in &ctx.matches {
252 for line in &m.before {
253 writeln!(out, " {line}").unwrap();
254 }
255 writeln!(out, ">>> [{}] {}", m.keyword, m.line).unwrap();
256 for line in &m.after {
257 writeln!(out, " {line}").unwrap();
258 }
259 out.push('\n');
260 }
261 if ctx.truncated {
262 any_truncated = true;
263 }
264 }
265 if any_truncated {
266 out.push_str(
267 "(notable events truncated — use --context-lines or --report for full context)\n",
268 );
269 }
270 out.push_str("</notable_events>\n");
271}
272
273#[cfg(test)]
278mod tests {
279 use super::*;
280 use crate::log_context::{extract_context, LogContextConfig};
281 use crate::report::{FileReport, ReportBuilder, ReportMetadata};
282 use crate::scanner::ScanStats;
283 use std::fs;
284 use tempfile::tempdir;
285
286 fn make_test_report(replacements: u64) -> SanitizeReport {
287 let builder = ReportBuilder::new(ReportMetadata {
288 version: "0.0.0".into(),
289 timestamp: "test".into(),
290 deterministic: false,
291 dry_run: false,
292 strict: false,
293 chunk_size: 1024,
294 threads: None,
295 secrets_file: None,
296 });
297 builder.record_file(FileReport::from_scan_stats(
298 "test.log",
299 &ScanStats {
300 matches_found: replacements,
301 replacements_applied: replacements,
302 ..Default::default()
303 },
304 "scanner",
305 ));
306 builder.finish()
307 }
308
309 #[test]
310 fn troubleshoot_embeds_preamble_and_instructions() {
311 let t = resolve_llm_template("troubleshoot").unwrap();
312 assert!(t.contains("sanitized"), "preamble should be embedded");
313 assert!(
314 t.contains("Root cause"),
315 "should request root cause analysis"
316 );
317 assert!(
318 t.contains("Remediation"),
319 "should request remediation steps"
320 );
321 assert!(
322 t.contains("clarifying question"),
323 "should instruct LLM to ask rather than guess"
324 );
325 }
326
327 #[test]
328 fn review_config_embeds_preamble_and_instructions() {
329 let t = resolve_llm_template("review-config").unwrap();
330 assert!(t.contains("sanitized"), "preamble should be embedded");
331 assert!(
332 t.contains("Misconfigurations"),
333 "should request misconfiguration review"
334 );
335 assert!(
336 t.contains("Security concerns"),
337 "should request security review"
338 );
339 assert!(
340 t.contains("clarifying question"),
341 "should instruct LLM to ask rather than guess"
342 );
343 }
344
345 #[test]
346 fn review_security_embeds_preamble_and_instructions() {
347 let t = resolve_llm_template("review-security").unwrap();
348 assert!(t.contains("sanitized"), "preamble should be embedded");
349 assert!(t.contains("Authentication"), "should cover auth review");
350 assert!(t.contains("Encryption"), "should cover TLS/crypto review");
351 assert!(
352 t.contains("Hardcoded"),
353 "should flag hardcoded credential placement"
354 );
355 assert!(
356 t.contains("clarifying question"),
357 "should instruct LLM to ask rather than guess"
358 );
359 }
360
361 #[test]
362 fn nonexistent_path_returns_error() {
363 let err = resolve_llm_template("/nonexistent/template.txt").unwrap_err();
364 assert!(err.contains("failed to read"), "got: {err}");
365 }
366
367 #[test]
368 fn custom_file_returns_raw_content() {
369 let dir = tempdir().unwrap();
370 let p = dir.path().join("custom.txt");
371 fs::write(&p, "MY CUSTOM INSTRUCTIONS\n").unwrap();
372 let t = resolve_llm_template(p.to_str().unwrap()).unwrap();
373 assert_eq!(t, "MY CUSTOM INSTRUCTIONS\n");
374 }
375
376 #[test]
377 fn prompt_includes_content_block() {
378 let entries = vec![("app.log".to_string(), b"sanitized line\n".to_vec())];
379 let prompt = format_llm_prompt("troubleshoot", &entries, None).unwrap();
380 assert!(
381 prompt.contains("<content name=\"app.log\">"),
382 "got:\n{prompt}"
383 );
384 assert!(prompt.contains("sanitized line"), "got:\n{prompt}");
385 assert!(prompt.contains("</content>"), "got:\n{prompt}");
386 }
387
388 #[test]
389 fn prompt_includes_sanitization_summary() {
390 let report = make_test_report(7);
391 let entries: Vec<LlmEntry> = vec![];
392 let prompt = format_llm_prompt("troubleshoot", &entries, Some(&report)).unwrap();
393 assert!(prompt.contains("## Sanitization Summary"), "got:\n{prompt}");
394 assert!(prompt.contains("Files processed: 1"), "got:\n{prompt}");
395 assert!(prompt.contains("Total replacements: 7"), "got:\n{prompt}");
396 }
397
398 #[test]
399 fn prompt_includes_notable_events_when_present() {
400 let builder = ReportBuilder::new(ReportMetadata {
401 version: "0.0.0".into(),
402 timestamp: "test".into(),
403 deterministic: false,
404 dry_run: false,
405 strict: false,
406 chunk_size: 1024,
407 threads: None,
408 secrets_file: None,
409 });
410 builder.record_file(FileReport::from_scan_stats(
411 "app.log",
412 &ScanStats::default(),
413 "scanner",
414 ));
415 let ctx = extract_context(
416 "INFO start\nERROR disk full\nINFO done",
417 &LogContextConfig::new().with_context_lines(1),
418 );
419 builder.set_file_log_context("app.log", ctx);
420 let report = builder.finish();
421
422 let entries: Vec<LlmEntry> = vec![];
423 let prompt = format_llm_prompt("troubleshoot", &entries, Some(&report)).unwrap();
424 assert!(prompt.contains("<notable_events>"), "got:\n{prompt}");
425 assert!(prompt.contains("# app.log"), "got:\n{prompt}");
426 assert!(prompt.contains(">>> [error]"), "got:\n{prompt}");
427 assert!(prompt.contains("ERROR disk full"), "got:\n{prompt}");
428 assert!(prompt.contains("</notable_events>"), "got:\n{prompt}");
429 }
430
431 #[test]
432 fn prompt_omits_notable_events_when_no_matches() {
433 let report = make_test_report(0);
434 let entries: Vec<LlmEntry> = vec![];
435 let prompt = format_llm_prompt("troubleshoot", &entries, Some(&report)).unwrap();
436 assert!(
437 !prompt.contains("<notable_events>"),
438 "should omit section when no keyword matches"
439 );
440 }
441
442 #[test]
443 fn prompt_multiple_content_blocks_in_order() {
444 let entries = vec![
445 ("first.log".to_string(), b"first content".to_vec()),
446 ("second.log".to_string(), b"second content".to_vec()),
447 ];
448 let prompt = format_llm_prompt("troubleshoot", &entries, None).unwrap();
449 let first_pos = prompt.find("first.log").unwrap();
450 let second_pos = prompt.find("second.log").unwrap();
451 assert!(
452 first_pos < second_pos,
453 "entries should appear in insertion order"
454 );
455 }
456
457 #[test]
458 fn prompt_includes_files_analyzed_manifest() {
459 let entries = vec![
460 ("/abs/app.log".to_string(), b"sanitized line\n".to_vec()),
461 (
462 "/abs/config.yaml".to_string(),
463 b"key: __SANITIZED__\n".to_vec(),
464 ),
465 ];
466 let prompt = format_llm_prompt("troubleshoot", &entries, None).unwrap();
467 assert!(prompt.contains("## Files Analyzed"), "got:\n{prompt}");
468 assert!(prompt.contains("- /abs/app.log"), "got:\n{prompt}");
469 assert!(prompt.contains("- /abs/config.yaml"), "got:\n{prompt}");
470 let manifest_pos = prompt.find("## Files Analyzed").unwrap();
471 let content_pos = prompt.find("<content name=").unwrap();
472 assert!(
473 manifest_pos < content_pos,
474 "manifest should precede content blocks"
475 );
476 }
477
478 #[test]
479 fn prompt_omits_files_analyzed_when_no_entries() {
480 let entries: Vec<LlmEntry> = vec![];
481 let prompt = format_llm_prompt("troubleshoot", &entries, None).unwrap();
482 assert!(
483 !prompt.contains("## Files Analyzed"),
484 "should omit manifest when no entries"
485 );
486 }
487
488 #[test]
489 fn reference_prompt_lists_output_paths() {
490 let dir = tempdir().unwrap();
491 let out1 = dir.path().join("app.log.sanitized");
492 let out2 = dir.path().join("config.yaml.sanitized");
493 let entries: Vec<LlmPathEntry> = vec![
494 ("/abs/input/app.log".to_string(), out1.clone()),
495 ("/abs/input/config.yaml".to_string(), out2.clone()),
496 ];
497 let prompt = format_llm_prompt_reference("troubleshoot", &entries, None).unwrap();
498 assert!(prompt.contains("## Sanitized Files"), "got:\n{prompt}");
499 assert!(
500 prompt.contains("/abs/input/app.log"),
501 "should include input label; got:\n{prompt}"
502 );
503 assert!(
504 prompt.contains(&out1.display().to_string()),
505 "should include output path; got:\n{prompt}"
506 );
507 assert!(
508 !prompt.contains("<content"),
509 "reference mode must not inline content"
510 );
511 }
512
513 #[test]
514 fn reference_prompt_includes_sanitization_summary() {
515 let report = make_test_report(12);
516 let entries: Vec<LlmPathEntry> = vec![];
517 let prompt = format_llm_prompt_reference("troubleshoot", &entries, Some(&report)).unwrap();
518 assert!(prompt.contains("## Sanitization Summary"), "got:\n{prompt}");
519 assert!(prompt.contains("Total replacements: 12"), "got:\n{prompt}");
520 }
521}