codio 0.1.1

Production-ready commit message generator using local Ollama LLM
Documentation
use std::collections::HashSet;
use std::path::Path;

const REDACTION_MARKER: &str = "[REDACTED SECRET LINE]";

#[derive(Debug)]
pub struct SanitizedDiff {
    pub diff: String,
    pub redacted_lines: usize,
    pub omitted_files: Vec<String>,
}

pub fn sanitize_diff(diff: &str, changed_files: &[String]) -> SanitizedDiff {
    let sensitive_files: HashSet<String> = changed_files
        .iter()
        .filter(|path| is_sensitive_file(path))
        .cloned()
        .collect();

    let mut output = String::new();
    let mut section = Vec::new();
    let mut current_file: Option<String> = None;
    let mut redacted_lines = 0usize;

    for line in diff.lines() {
        if line.starts_with("diff --git ") {
            flush_diff_section(
                &mut output,
                &section,
                current_file.as_deref(),
                &sensitive_files,
                &mut redacted_lines,
            );
            section.clear();
            current_file = parse_diff_file_path(line);
        }
        section.push(line.to_string());
    }

    flush_diff_section(
        &mut output,
        &section,
        current_file.as_deref(),
        &sensitive_files,
        &mut redacted_lines,
    );

    let mut omitted_files: Vec<String> = sensitive_files.iter().cloned().collect();
    omitted_files.sort();

    if !omitted_files.is_empty() {
        output = format!(
            "{}\n\n{}",
            omitted_files
                .iter()
                .map(|f| format!("Sensitive file changed (content omitted): {f}"))
                .collect::<Vec<_>>()
                .join("\n"),
            output
        );
    }

    SanitizedDiff {
        diff: output,
        redacted_lines,
        omitted_files,
    }
}

fn flush_diff_section(
    out: &mut String,
    section: &[String],
    file: Option<&str>,
    sensitive_files: &HashSet<String>,
    redacted_lines: &mut usize,
) {
    if section.is_empty() {
        return;
    }

    if let Some(path) = file {
        if sensitive_files.contains(path) {
            out.push_str(&format!(
                "diff --git a/{path} b/{path}\n@@ content omitted for sensitive file @@\n"
            ));
            return;
        }
    }

    for line in section {
        if line_contains_secret(line) {
            out.push_str(REDACTION_MARKER);
            out.push('\n');
            *redacted_lines += 1;
        } else {
            out.push_str(line);
            out.push('\n');
        }
    }
}

fn parse_diff_file_path(line: &str) -> Option<String> {
    let mut parts = line.split_whitespace();
    let _ = parts.next();
    let _ = parts.next();
    let _a_path = parts.next()?;
    let b_path = parts.next()?;

    Some(normalize_diff_path(b_path))
}

fn normalize_diff_path(path: &str) -> String {
    path.strip_prefix("b/").unwrap_or(path).to_string()
}

fn is_sensitive_file(path: &str) -> bool {
    let path_lower = path.to_ascii_lowercase();
    let file_name = Path::new(path)
        .file_name()
        .and_then(|n| n.to_str())
        .unwrap_or("")
        .to_ascii_lowercase();

    path_lower.contains(".env")
        || file_name.ends_with(".env")
        || file_name.ends_with(".pem")
        || file_name.ends_with(".p12")
        || file_name.ends_with(".pfx")
        || file_name.ends_with(".key")
        || file_name == "id_rsa"
        || file_name.contains("secret")
        || path_lower.contains("credentials")
}

fn line_contains_secret(line: &str) -> bool {
    let upper = line.to_ascii_uppercase();
    [
        "API_KEY=",
        "SECRET",
        "PRIVATE KEY",
        "TOKEN=",
        "PASSWORD=",
        "ACCESS_KEY",
    ]
    .iter()
    .any(|needle| upper.contains(needle))
}

#[cfg(test)]
mod tests {
    use super::sanitize_diff;

    #[test]
    fn sensitive_files_are_omitted() {
        let files = vec![".env".to_string()];
        let diff = "diff --git a/.env b/.env\n+API_KEY=abc\n";
        let out = sanitize_diff(diff, &files);
        assert_eq!(out.omitted_files, vec![".env".to_string()]);
        assert!(out.diff.contains("content omitted"));
    }

    #[test]
    fn lines_with_secret_patterns_are_redacted() {
        let files = vec!["src/app.rs".to_string()];
        let diff = "diff --git a/src/app.rs b/src/app.rs\n+const TOKEN=\"abc\";\n+let x = 1;\n";
        let out = sanitize_diff(diff, &files);
        assert_eq!(out.redacted_lines, 1);
        assert!(out.diff.contains("[REDACTED SECRET LINE]"));
    }
}