use anyhow::{Context, Result, bail};
use chrono::Utc;
use futures_util::{StreamExt as _, stream};
use ignore::WalkBuilder;
use regex::Regex;
use std::collections::BTreeSet;
use std::fmt::Write as _;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Instant;
use crate::{config, model, session};
const TARGET_CHUNK_TOKENS: usize = 64_000;
const SMALL_REPO_TOKENS: usize = 80_000;
const MAX_REVIEW_CHUNKS: usize = 80;
const MAX_FILE_BYTES: u64 = 512 * 1024;
const SECURITY_INDEX_LIMIT: usize = 160;
const FINDINGS_PER_CHUNK_LIMIT_TOKENS: usize = 6_000;
const DEFAULT_AUDIT_PARALLELISM: usize = 8;
#[derive(Debug, Clone)]
pub struct AuditOptions {
pub root: PathBuf,
pub model: String,
pub focus: String,
pub out: PathBuf,
}
#[derive(Debug, Clone)]
pub struct AuditResult {
pub output_path: PathBuf,
pub file_count: usize,
pub chunk_count: usize,
}
#[derive(Debug, Clone)]
struct AuditFile {
path: String,
language: &'static str,
bytes: u64,
tokens: usize,
text: String,
}
#[derive(Debug, Clone)]
struct AuditChunk {
files: Vec<AuditFile>,
tokens: usize,
}
pub async fn run(options: AuditOptions) -> Result<AuditResult> {
let started = Instant::now();
let model_spec = model::to_genai_model_spec(&options.model);
let output_path = config::resolve_workspace_output_path(&options.root, &options.out)?;
let files = collect_files(&options.root, Some(&output_path), &model_spec)?;
if files.is_empty() {
bail!("no reviewable text files found for audit");
}
let manifest = build_manifest(&files);
let index = build_security_index(&files);
let chunks = chunk_files(files, TARGET_CHUNK_TOKENS);
if chunks.len() > MAX_REVIEW_CHUNKS {
bail!(
"audit would require {} chunks; narrow the repo or raise chunking limits before running",
chunks.len()
);
}
let file_count = chunks.iter().map(|chunk| chunk.files.len()).sum::<usize>();
let chunk_count = chunks.len();
let progress = AuditProgress::new(started, file_count, chunk_count);
progress.prepared();
let system_prompt = prompts::audit_system_prompt();
let report = if chunks.len() == 1 && chunks[0].tokens <= SMALL_REPO_TOKENS {
let repo_text = chunk_text(&chunks[0]);
let prompt = prompts::audit_full_prompt(&options.focus, &manifest, &index, &repo_text);
progress.review_started(None);
let report =
session::run_prompt_once_no_tools(&options.model, &system_prompt, &prompt).await?;
progress.review_finished(1);
report
} else {
progress.review_started(Some(DEFAULT_AUDIT_PARALLELISM));
let completed_chunks = Arc::new(AtomicUsize::new(0));
let mut chunk_findings = stream::iter(chunks.iter().enumerate())
.map(|(idx, chunk)| {
let chunk_id = idx + 1;
let prompt = prompts::audit_chunk_prompt(
&options.focus,
&manifest,
&index,
chunk_id,
chunk_count,
&chunk_text(chunk),
);
let model = &options.model;
let system_prompt = &system_prompt;
let completed_chunks = Arc::clone(&completed_chunks);
async move {
let findings =
session::run_prompt_once_no_tools(model, system_prompt, &prompt).await?;
let completed = completed_chunks.fetch_add(1, Ordering::Relaxed) + 1;
progress.review_finished(completed);
Ok::<_, anyhow::Error>((chunk_id, findings))
}
})
.buffer_unordered(DEFAULT_AUDIT_PARALLELISM)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<Result<Vec<_>>>()?;
chunk_findings.sort_by_key(|(chunk_id, _)| *chunk_id);
let mut candidate_findings = String::new();
for (chunk_id, findings) in chunk_findings {
let compact = compact_to_tokens(
&model_spec,
findings.trim(),
FINDINGS_PER_CHUNK_LIMIT_TOKENS,
);
let _ = writeln!(
candidate_findings,
"\n## Candidate findings from chunk {chunk_id}\n"
);
candidate_findings.push_str(compact.trim());
candidate_findings.push('\n');
}
let prompt = prompts::audit_reduce_prompt(&options.focus, &manifest, &candidate_findings);
progress.summarise_started();
let report =
session::run_prompt_once_no_tools(&options.model, &system_prompt, &prompt).await?;
progress.summarise_finished();
report
};
let report = with_transparency_line(&report, &transparency_snippet(&options));
let report = with_succinct_findings_summary(&report);
progress.write_started(&output_path);
config::write_workspace_file(&output_path, report.as_bytes())?;
progress.write_finished(&output_path);
Ok(AuditResult {
output_path,
file_count,
chunk_count,
})
}
#[derive(Debug, Clone, Copy)]
struct AuditProgress {
started: Instant,
file_count: usize,
chunk_count: usize,
}
impl AuditProgress {
fn new(started: Instant, file_count: usize, chunk_count: usize) -> Self {
Self {
started,
file_count,
chunk_count,
}
}
fn prepared(&self) {
self.line(
"prepared",
1,
1,
format_args!("{} files · {} chunks", self.file_count, self.chunk_count),
);
}
fn review_started(&self, parallelism: Option<usize>) {
let detail = match parallelism {
Some(parallelism) => format!(
"reviewing {} chunks · parallelism {parallelism}",
self.chunk_count
),
None => "reviewing full repo".to_string(),
};
self.line("review", 0, self.chunk_count, detail);
}
fn review_finished(&self, completed: usize) {
if completed < self.chunk_count && !completed.is_multiple_of(self.review_update_stride()) {
return;
}
let detail = if completed >= self.chunk_count {
"review complete".to_string()
} else {
format!("{completed}/{} chunks complete", self.chunk_count)
};
self.line("review", completed, self.chunk_count, detail);
}
fn review_update_stride(&self) -> usize {
self.chunk_count.div_ceil(10).max(1)
}
fn summarise_started(&self) {
self.line("summarise", 0, 1, "deduping and ranking findings");
}
fn summarise_finished(&self) {
self.line("summarise", 1, 1, "summary complete");
}
fn write_started(&self, output_path: &Path) {
self.line("write", 0, 1, output_path.display());
}
fn write_finished(&self, output_path: &Path) {
self.line("write", 1, 1, output_path.display());
}
fn line(&self, label: &str, current: usize, total: usize, detail: impl std::fmt::Display) {
crate::ui::progress(label, current, total, detail, self.started.elapsed());
}
}
fn collect_files(
root: &Path,
output_path: Option<&Path>,
model_spec: &str,
) -> Result<Vec<AuditFile>> {
let mut files = Vec::new();
let output_path = output_path.and_then(|path| path.canonicalize().ok());
let mut builder = WalkBuilder::new(root);
builder
.hidden(false)
.git_ignore(true)
.git_global(false)
.git_exclude(true)
.follow_links(false);
for entry in builder.build() {
let entry = entry.map_err(|err| anyhow::anyhow!(err))?;
let path = entry.path();
let Some(file_type) = entry.file_type() else {
continue;
};
if !file_type.is_file() {
continue;
}
let rel = rel_path(root, path)?;
if should_skip_path(&rel) {
continue;
}
if output_path.as_ref().is_some_and(|out| path == out) {
continue;
}
let meta = match fs::metadata(path) {
Ok(meta) => meta,
Err(_) => continue,
};
if meta.len() > MAX_FILE_BYTES {
continue;
}
let raw = match fs::read(path) {
Ok(raw) => raw,
Err(_) => continue,
};
if raw.contains(&0) {
continue;
}
let text = String::from_utf8_lossy(&raw).to_string();
if text.trim().is_empty() {
continue;
}
let tokens = session::count_tokens(model_spec, &text).max(1);
files.push(AuditFile {
language: language_for_path(&rel),
path: rel,
bytes: meta.len(),
tokens,
text,
});
}
files.sort_by_key(audit_priority);
Ok(files)
}
fn should_skip_path(path: &str) -> bool {
let lower = path.to_ascii_lowercase();
if lower.starts_with(".git/")
|| lower.starts_with("target/")
|| lower.starts_with("node_modules/")
|| lower.starts_with(".venv/")
|| lower.starts_with(".tmp/")
{
return true;
}
let name = Path::new(path)
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("")
.to_ascii_lowercase();
matches!(
name.as_str(),
"cargo.lock" | "package-lock.json" | "pnpm-lock.yaml" | "yarn.lock" | "uv.lock" | "go.sum"
)
}
fn audit_priority(file: &AuditFile) -> (u8, std::cmp::Reverse<usize>, String) {
let path = file.path.to_ascii_lowercase();
let score = if security_path_score(&path) { 0 } else { 1 };
(score, std::cmp::Reverse(file.tokens), path)
}
fn security_path_score(path: &str) -> bool {
[
"auth",
"session",
"token",
"secret",
"crypto",
"password",
"policy",
"permission",
"admin",
"login",
"security",
"config",
"route",
"api",
"http",
"request",
"shell",
"command",
"process",
"file",
"path",
"upload",
"download",
"network",
]
.iter()
.any(|needle| path.contains(needle))
}
fn chunk_files(files: Vec<AuditFile>, target_tokens: usize) -> Vec<AuditChunk> {
let mut chunks = Vec::new();
let mut current = Vec::new();
let mut total = 0usize;
for file in files {
if !current.is_empty() && total + file.tokens > target_tokens {
chunks.push(AuditChunk {
files: current,
tokens: total,
});
current = Vec::new();
total = 0;
}
total += file.tokens;
current.push(file);
}
if !current.is_empty() {
chunks.push(AuditChunk {
files: current,
tokens: total,
});
}
chunks
}
fn build_manifest(files: &[AuditFile]) -> String {
let mut languages = BTreeSet::new();
let total_tokens = files.iter().map(|file| file.tokens).sum::<usize>();
let total_bytes = files.iter().map(|file| file.bytes).sum::<u64>();
for file in files {
languages.insert(file.language);
}
let mut out = String::new();
let _ = writeln!(out, "files: {}", files.len());
let _ = writeln!(out, "estimated_tokens: {total_tokens}");
let _ = writeln!(out, "bytes: {total_bytes}");
let _ = writeln!(
out,
"languages: {}",
languages.into_iter().collect::<Vec<_>>().join(", ")
);
out.push_str("largest/security-prioritized files:\n");
for file in files.iter().take(40) {
let _ = writeln!(
out,
"- {} ({}; {} tokens; {} bytes)",
file.path, file.language, file.tokens, file.bytes
);
}
out
}
fn build_security_index(files: &[AuditFile]) -> String {
let keywords = [
"auth",
"authorize",
"permission",
"role",
"session",
"token",
"secret",
"password",
"key",
"credential",
"crypto",
"encrypt",
"decrypt",
"sign",
"verify",
"path",
"file",
"canonical",
"symlink",
"upload",
"download",
"shell",
"command",
"process",
"env",
"http",
"url",
"fetch",
"request",
"deserialize",
"unsafe",
"eval",
"admin",
];
let mut out = String::new();
let mut count = 0usize;
'files: for file in files {
for (line_no, line) in file.text.lines().enumerate() {
let lower = line.to_ascii_lowercase();
if keywords.iter().any(|keyword| lower.contains(keyword)) {
let trimmed = line.trim();
if !trimmed.is_empty() {
let _ = writeln!(
out,
"- {}:{}: {}",
file.path,
line_no + 1,
crate::ui::truncate_chars(trimmed, 180)
);
count += 1;
if count >= SECURITY_INDEX_LIMIT {
break 'files;
}
}
}
}
}
if out.is_empty() {
"- no keyword hits found".to_string()
} else {
out
}
}
fn chunk_text(chunk: &AuditChunk) -> String {
let mut out = String::new();
for file in &chunk.files {
let _ = writeln!(out, "\n## {}\n", file.path);
out.push_str(&file.text);
if !file.text.ends_with('\n') {
out.push('\n');
}
}
out
}
fn compact_to_tokens<'a>(
model_spec: &str,
text: &'a str,
max_tokens: usize,
) -> std::borrow::Cow<'a, str> {
if session::count_tokens(model_spec, text) <= max_tokens {
return std::borrow::Cow::Borrowed(text);
}
let (short, _) = crate::ui::head_tail(text, max_tokens.saturating_mul(4).max(2000));
std::borrow::Cow::Owned(short)
}
fn transparency_snippet(options: &AuditOptions) -> String {
let mut command = String::new();
if !options.model.trim().is_empty() {
let _ = write!(command, "OY_MODEL={} ", options.model.trim());
}
command.push_str("oy audit");
if options.out != Path::new("ISSUES.md") {
let _ = write!(command, " --out {}", options.out.display());
}
if !options.focus.trim().is_empty() {
command.push(' ');
command.push_str(options.focus.trim());
}
format!(
"> {} `{}` · {}",
prompts::AUDIT_TRANSPARENCY_PREFIX,
command,
Utc::now().format("%Y-%m-%d")
)
}
pub(crate) fn with_transparency_line(report: &str, snippet: &str) -> String {
let mut lines = report
.lines()
.filter(|line| !line.starts_with(&format!("> {}", prompts::AUDIT_TRANSPARENCY_PREFIX)))
.collect::<Vec<_>>();
while lines.first().is_some_and(|line| line.trim().is_empty()) {
lines.remove(0);
}
if lines
.first()
.is_none_or(|line| line.trim() != prompts::AUDIT_REPORT_TITLE)
{
lines.insert(0, prompts::AUDIT_REPORT_TITLE);
}
let insert_at = 1;
let mut rebuilt = Vec::new();
rebuilt.extend_from_slice(&lines[..insert_at]);
rebuilt.push("");
rebuilt.push(snippet);
if lines.len() > insert_at {
rebuilt.push("");
for line in &lines[insert_at..] {
if !line.trim().is_empty() || rebuilt.last().is_some_and(|last| !last.trim().is_empty())
{
rebuilt.push(line);
}
}
}
finish_markdown(rebuilt)
}
pub(crate) fn with_succinct_findings_summary(report: &str) -> String {
let lines = report.lines().collect::<Vec<_>>();
if has_heading(&lines, "Findings summary") {
return finish_markdown(lines);
}
let findings = extract_findings(&lines);
if findings.is_empty() {
return finish_markdown(lines);
}
let insert_at = transparency_insert_index(&lines);
let mut rebuilt = Vec::with_capacity(lines.len() + findings.len() + 4);
rebuilt.extend(lines[..insert_at].iter().map(|line| (*line).to_string()));
if rebuilt.last().is_some_and(|line| !line.trim().is_empty()) {
rebuilt.push(String::new());
}
rebuilt.push("## Findings summary".to_string());
rebuilt.push(String::new());
rebuilt.extend(findings.into_iter().map(|finding| finding.to_markdown()));
rebuilt.push(String::new());
rebuilt.extend(lines[insert_at..].iter().map(|line| (*line).to_string()));
finish_markdown_owned(rebuilt)
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct FindingSummary {
severity: String,
title: String,
code_ref: String,
}
impl FindingSummary {
fn to_markdown(&self) -> String {
format!(
"- **{}** `{}` — {}",
self.severity, self.code_ref, self.title
)
}
}
fn extract_findings(lines: &[&str]) -> Vec<FindingSummary> {
static HEADING_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"^(#{2,4})\s+(.+?)\s*$").expect("valid heading regex")
});
let mut findings = Vec::new();
let mut current: Option<(String, Vec<&str>)> = None;
for line in lines {
if let Some(captures) = HEADING_RE.captures(line) {
if let Some((heading, body)) = current.take()
&& let Some(finding) = finding_from_section(&heading, &body)
{
findings.push(finding);
}
let level = captures.get(1).map(|m| m.as_str().len()).unwrap_or(0);
let heading = captures
.get(2)
.map(|m| m.as_str().trim().to_string())
.unwrap_or_default();
if level >= 2 && is_finding_heading(&heading) {
current = Some((heading, Vec::new()));
} else {
current = None;
}
} else if let Some((_, body)) = current.as_mut() {
body.push(line);
}
}
if let Some((heading, body)) = current.take()
&& let Some(finding) = finding_from_section(&heading, &body)
{
findings.push(finding);
}
findings
}
fn finding_from_section(heading: &str, body: &[&str]) -> Option<FindingSummary> {
let severity = severity_from_text(heading)
.or_else(|| body.iter().find_map(|line| severity_from_text(line)))
.unwrap_or_else(|| "Unrated".to_string());
let title = clean_finding_title(heading);
let code_ref = body
.iter()
.find_map(|line| code_ref_from_line(line))
.or_else(|| code_ref_from_line(heading))?;
Some(FindingSummary {
severity,
title,
code_ref,
})
}
fn is_finding_heading(heading: &str) -> bool {
let lower = heading.to_ascii_lowercase();
!matches!(
lower.as_str(),
"findings summary"
| "summary"
| "detailed findings"
| "details"
| "no concrete findings"
| "audit issues"
)
}
fn severity_from_text(text: &str) -> Option<String> {
static SEVERITY_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"(?i)\b(critical|high|medium|low|info|informational)\b")
.expect("valid severity regex")
});
SEVERITY_RE
.captures(text)
.and_then(|captures| captures.get(1))
.map(
|match_| match match_.as_str().to_ascii_lowercase().as_str() {
"critical" => "Critical".to_string(),
"high" => "High".to_string(),
"medium" => "Medium".to_string(),
"low" => "Low".to_string(),
_ => "Info".to_string(),
},
)
}
fn clean_finding_title(heading: &str) -> String {
static TITLE_SEVERITY_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(
r"(?i)^\s*[\[(]?\s*(informational|critical|high|medium|low|info)\s*[\])]?\s*[:—–-]+\s*",
)
.expect("valid title severity regex")
});
let title = heading.trim().trim_matches('#').trim();
let title = TITLE_SEVERITY_RE.replace(title, "").trim().to_string();
if title.is_empty() {
"Untitled finding".to_string()
} else {
title
}
}
fn code_ref_from_line(line: &str) -> Option<String> {
static CODE_REF_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
Regex::new(r"[A-Za-z0-9_.@+\-/]+\.[A-Za-z0-9]+(?::\d+)?(?:::[A-Za-z_][A-Za-z0-9_]*)?")
.expect("valid code reference regex")
});
CODE_REF_RE.find(line).map(|match_| {
match_
.as_str()
.trim_matches(|ch: char| ch == '`' || ch == ',' || ch == ')' || ch == ']')
.to_string()
})
}
fn has_heading(lines: &[&str], heading: &str) -> bool {
lines.iter().any(|line| {
line.trim_start_matches('#')
.trim()
.eq_ignore_ascii_case(heading)
})
}
fn transparency_insert_index(lines: &[&str]) -> usize {
lines
.iter()
.position(|line| line.starts_with(&format!("> {}", prompts::AUDIT_TRANSPARENCY_PREFIX)))
.map(|idx| idx + 1)
.unwrap_or_else(|| {
lines
.iter()
.position(|line| line.trim() == prompts::AUDIT_REPORT_TITLE)
.map(|idx| idx + 1)
.unwrap_or(0)
})
}
fn finish_markdown(lines: Vec<&str>) -> String {
let mut out = lines.join("\n");
out.push('\n');
out
}
fn finish_markdown_owned(lines: Vec<String>) -> String {
let mut out = lines.join("\n");
out.push('\n');
out
}
fn rel_path(root: &Path, path: &Path) -> Result<String> {
let resolved = path
.canonicalize()
.with_context(|| format!("failed resolving {}", path.display()))?;
if !resolved.starts_with(root) {
bail!("path escaped workspace: {}", path.display());
}
Ok(resolved
.strip_prefix(root)?
.to_string_lossy()
.replace('\\', "/"))
}
fn language_for_path(path: &str) -> &'static str {
match Path::new(path)
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("")
.to_ascii_lowercase()
.as_str()
{
"rs" => "Rust",
"py" => "Python",
"go" => "Go",
"js" | "mjs" | "cjs" => "JavaScript",
"ts" | "tsx" => "TypeScript",
"java" => "Java",
"kt" | "kts" => "Kotlin",
"swift" => "Swift",
"rb" => "Ruby",
"php" => "PHP",
"cs" => "C#",
"c" | "h" => "C",
"cc" | "cpp" | "cxx" | "hpp" => "C++",
"toml" => "TOML",
"yaml" | "yml" => "YAML",
"json" => "JSON",
"md" => "Markdown",
"sh" | "bash" => "Shell",
_ => "Text",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn transparency_line_is_inserted_after_title() {
let out = with_transparency_line(
"# Audit Issues\n\n## H1\n",
"> Generated with [oy-cli](https://github.com/wagov-dtt/oy-cli): `oy audit`",
);
assert!(out.starts_with("# Audit Issues\n\n> Generated with [oy-cli]"));
assert!(out.contains("## H1"));
}
#[test]
fn succinct_summary_is_inserted_from_detailed_findings() {
let out = with_succinct_findings_summary(
"# Audit Issues\n\n> Generated with [oy-cli](https://github.com/wagov-dtt/oy-cli): `oy audit`\n\n## Detailed findings\n\n### High: path traversal reaches file writes\n\n- Evidence: `src/files.rs:42` passes user input into write.\n- Fix: canonicalize under the workspace.\n\n### Low: noisy retry loop\n\n- Severity: Low\n- Evidence: `src/retry.rs::spin` retries without backoff.\n",
);
assert!(out.contains("## Findings summary"));
assert!(out.contains("- **High** `src/files.rs:42` — path traversal reaches file writes"));
assert!(out.contains("- **Low** `src/retry.rs::spin` — noisy retry loop"));
assert!(out.find("## Findings summary") < out.find("## Detailed findings"));
}
#[test]
fn existing_findings_summary_is_preserved() {
let report =
"# Audit Issues\n\n## Findings summary\n\n- **High** `src/lib.rs:1` — existing\n";
assert_eq!(with_succinct_findings_summary(report), report);
}
#[test]
fn chunking_keeps_files_under_target_when_possible() {
let files = vec![
AuditFile {
path: "a.rs".into(),
language: "Rust",
bytes: 1,
tokens: 5,
text: "a".into(),
},
AuditFile {
path: "b.rs".into(),
language: "Rust",
bytes: 1,
tokens: 7,
text: "b".into(),
},
AuditFile {
path: "c.rs".into(),
language: "Rust",
bytes: 1,
tokens: 4,
text: "c".into(),
},
];
let chunks = chunk_files(files, 12);
assert_eq!(chunks.len(), 2);
assert_eq!(chunks[0].tokens, 12);
assert_eq!(chunks[1].tokens, 4);
}
#[test]
fn skips_lockfiles_and_build_dirs() {
assert!(should_skip_path("target/debug/app"));
assert!(should_skip_path("Cargo.lock"));
assert!(!should_skip_path("src/main.rs"));
}
}
// === Audit prompts ===
mod prompts {
use std::fmt::Write as _;
pub const AUDIT_REPORT_TITLE: &str = "# Audit Issues";
pub const AUDIT_TRANSPARENCY_PREFIX: &str =
"Generated with [oy-cli](https://github.com/wagov-dtt/oy-cli):";
pub const AUDIT_SYSTEM_PROMPT: &str = r#"You are oy in audit mode. Audit the repository for security issues, unnecessary complexity, and material usability or performance problems.
Be terse, evidence-first, and repo-specific. Avoid generic best-practice advice, style nits, and speculation.
Finding quality bar:
- Report only concrete issues with a plausible attack path, trigger, broken invariant, data exposure, integrity risk, privilege impact, or material operational impact.
- For vulnerabilities, include the trust boundary, sink, affected path/symbol evidence, impact, exploitability/preconditions, and a concrete fix.
- Prefer critical/high security findings and issues likely to cause production incidents.
- Prefer simple remediations that remove whole bug classes.
- Return [] or say no concrete findings for a chunk when evidence is weak.
- Final reports must include a succinct all-findings summary with code references, then detailed writeups for only the most severe 10-20 findings.
Use the embedded OWASP/grugbrain reference as a lightweight checklist and citation guide. Spend tokens on repository evidence, not long standards explanations."#;
pub const AUDIT_REFERENCE: &str = r#"Audit reference checklist:
OWASP ASVS 5.0 quick map:
- V1 Architecture: trust boundaries, secure design, attack surface, threat model gaps, dangerous defaults.
- V2 Authentication: credential handling, MFA, session/auth lifecycle, account recovery.
- V3 Session: cookie/token handling, fixation, expiration, revocation, CSRF-relevant state.
- V4 Access Control: object/function authorization, tenant isolation, confused deputy paths.
- V5 Validation: parser boundaries, canonicalization, path traversal, SSRF, injection, deserialization.
- V6 Cryptography: key management, weak/custom crypto, randomness, secret storage.
- V7 Error/Logging: secret leakage, unsafe diagnostics, audit trail gaps.
- V8 Data Protection: sensitive data at rest/in transit, retention, cache/backup exposure.
- V9 Communications: TLS verification, hostname validation, downgrade/debug transport.
- V10 Malicious Code: supply chain, unsafe dynamic loading, dependency/update risk.
- V11 Business Logic: state-machine bypass, race/double-submit, workflow abuse.
- V12 Files/Resources: upload/download, archive extraction, filesystem boundaries, quotas.
- V13 API/Web Service: mass assignment, schema validation, rate limits, authz on APIs.
- V14 Configuration: insecure defaults, debug flags, secret/config sprawl.
OWASP MASVS/MASWE for mobile repos only:
- STORAGE, CRYPTO, AUTH, NETWORK, PLATFORM, CODE, RESILIENCE, PRIVACY; use MASWE IDs only when a concrete mobile weakness maps cleanly.
Grugbrain complexity filter:
- Grugbrain has no formal section IDs; do not invent citations. Use exact lookup phrases only.
- Useful phrases: `complexity very bad`, `local reasoning`, `small sharp tools`, `avoid wrong abstraction`, `too much abstraction`, `closures like salt`, `reproduce bug first`, `testing`.
- Use grugbrain for complexity/maintainability findings, or as secondary support where complexity materially increases exploitability or review failure risk.
Combined heuristic:
- Security bug plus high complexity is higher priority because it is harder to review, fix safely, and prevent from recurring.
- Prefer findings where code both violates a security control and hides that violation behind abstraction, config sprawl, hidden state, or broad capability.
- If a simpler design removes an entire bug class, say so explicitly."#;
pub fn audit_chunk_prompt(
focus: &str,
manifest: &str,
index: &str,
chunk_id: usize,
chunk_count: usize,
chunk_text: &str,
) -> String {
let mut prompt = String::new();
let _ = writeln!(prompt, "Review audit chunk {chunk_id}/{chunk_count}.");
push_focus(&mut prompt, focus);
prompt.push_str("\nReturn concise candidate findings for this chunk only. Use markdown with one `###` heading per finding, or return `[]` if there are no concrete findings. For each finding include severity, category, evidence path/symbol, trust boundary/sink when security-relevant, impact, reference, and fix. Do not write files.\n\n");
prompt.push_str("Repository manifest:\n");
prompt.push_str(manifest.trim());
prompt.push_str("\n\nSecurity-relevant index:\n");
prompt.push_str(index.trim());
prompt.push_str("\n\nChunk contents:\n");
prompt.push_str(chunk_text.trim());
prompt
}
pub fn audit_full_prompt(focus: &str, manifest: &str, index: &str, repo_text: &str) -> String {
let mut prompt = String::new();
prompt.push_str("Conduct a full repository audit and return the final markdown report.\n");
push_focus(&mut prompt, focus);
prompt.push_str("\nReport format:\n1. Start with `# Audit Issues`.\n2. Add `## Findings summary` with one succinct bullet/table row for every concrete finding, including severity, short title, and code reference (`path:line` or `path::symbol`).\n3. Add `## Detailed findings` for only the most severe 10-20 findings, ranked by severity/exploitability/impact; include category, evidence, trust boundary/sink where security-relevant, impact, exploitability/preconditions, reference, and fix.\n4. Avoid generic advice. Do not write files.\n\n");
prompt.push_str("Repository manifest:\n");
prompt.push_str(manifest.trim());
prompt.push_str("\n\nSecurity-relevant index:\n");
prompt.push_str(index.trim());
prompt.push_str("\n\nRepository contents:\n");
prompt.push_str(repo_text.trim());
prompt
}
pub fn audit_reduce_prompt(focus: &str, manifest: &str, findings: &str) -> String {
let mut prompt = String::new();
prompt.push_str("Condense candidate audit findings into the final markdown report.\n");
push_focus(&mut prompt, focus);
prompt.push_str("\nReport format:\n1. Start with `# Audit Issues`.\n2. Add `## Findings summary` with one succinct bullet/table row for every concrete finding that survives dedupe, including severity, short title, and code reference (`path:line` or `path::symbol`).\n3. Add `## Detailed findings` for only the most severe 10-20 findings, ranked by severity/exploitability/impact; preserve the shortest evidence needed to prove exploitability or impact, plus category, trust boundary/sink where security-relevant, reference, and fix.\n4. Drop weak/speculative/duplicate items, but do not omit concrete lower-severity findings from the summary.\n\n");
prompt.push_str("Repository manifest:\n");
prompt.push_str(manifest.trim());
prompt.push_str("\n\nCandidate findings:\n");
prompt.push_str(findings.trim());
prompt
}
fn push_focus(out: &mut String, focus: &str) {
let focus = focus.trim();
if !focus.is_empty() {
let _ = writeln!(out, "Additional focus: {focus}");
}
}
pub fn audit_system_prompt() -> String {
format!(
"{}\n\n{}",
AUDIT_SYSTEM_PROMPT.trim(),
AUDIT_REFERENCE.trim()
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn audit_system_prompt_embeds_owasp_and_grugbrain_reference() {
let prompt = audit_system_prompt();
assert!(prompt.contains("OWASP ASVS 5.0"));
assert!(prompt.contains("Grugbrain"));
assert!(prompt.contains("complexity very bad"));
assert!(prompt.contains("trust boundary"));
}
#[test]
fn audit_prompts_include_focus_when_present() {
let prompt = audit_full_prompt("auth paths", "files: 1", "- hit", "src/lib.rs");
assert!(prompt.contains("Additional focus: auth paths"));
assert!(prompt.contains("# Audit Issues"));
}
#[test]
fn final_audit_prompts_request_succinct_summary_and_limited_details() {
let full = audit_full_prompt("", "files: 1", "- hit", "src/lib.rs");
assert!(full.contains("## Findings summary"));
assert!(full.contains("every concrete finding"));
assert!(full.contains("most severe 10-20"));
let reduce = audit_reduce_prompt("", "files: 1", "### High\nEvidence: src/lib.rs:1");
assert!(reduce.contains("## Findings summary"));
assert!(reduce.contains("do not omit concrete lower-severity findings"));
assert!(reduce.contains("most severe 10-20"));
}
}
}