pub mod anthropic;
pub mod ollama;
use std::path::Path;
use anyhow::Result;
use async_trait::async_trait;
use tracing::warn;
use cartomancer_core::config::{KnowledgeConfig, LlmConfig};
use cartomancer_core::finding::Finding;
use crate::path_security::validate_path_within;
#[async_trait]
pub trait LlmProvider: Send + Sync {
fn name(&self) -> &str;
async fn health_check(&self) -> Result<()>;
async fn complete(&self, prompt: &str) -> Result<String>;
async fn deepen(&self, finding: &mut Finding) -> Result<()> {
let prompt = build_deepening_prompt(finding, "");
let raw = self.complete(&prompt).await?;
let (analysis, fix) = parse_llm_response(&raw);
finding.llm_analysis = Some(analysis);
if let Some(ref fix) = fix {
finding.agent_prompt = Some(build_agent_prompt(finding, fix));
}
finding.suggested_fix = fix;
Ok(())
}
}
pub fn create_provider(
config: &LlmConfig,
system_prompt: Option<&str>,
) -> Result<Box<dyn LlmProvider>> {
match config.provider {
cartomancer_core::config::LlmBackend::Ollama => {
let base_url = config
.ollama_base_url
.as_deref()
.unwrap_or("http://localhost:11434");
let model = config.ollama_model.as_deref().unwrap_or("gemma4");
Ok(Box::new(ollama::OllamaProvider::new(
base_url,
model,
system_prompt.map(|s| s.to_string()),
)))
}
cartomancer_core::config::LlmBackend::Anthropic => {
anthropic::AnthropicProvider::validate_max_tokens(config.max_tokens)?;
let api_key = config
.anthropic_api_key
.clone()
.or_else(|| std::env::var("ANTHROPIC_API_KEY").ok())
.ok_or_else(|| {
anyhow::anyhow!(
"Anthropic API key required: set llm.anthropic_api_key or ANTHROPIC_API_KEY"
)
})?;
let model = config
.anthropic_model
.as_deref()
.unwrap_or("claude-sonnet-4-20250514");
let provider = anthropic::AnthropicProvider::new(
&api_key,
model,
config.max_tokens,
system_prompt.map(|s| s.to_string()),
)?;
Ok(Box::new(provider))
}
}
}
pub fn load_knowledge(work_dir: &Path, config: &KnowledgeConfig) -> String {
let Some(ref path) = config.knowledge_file else {
return String::new();
};
if path.is_empty() {
return String::new();
}
let full_path = match validate_path_within(work_dir, path) {
Ok(p) => p,
Err(e) => {
warn!(path = %path, err = %e, "knowledge file path rejected");
return String::new();
}
};
match std::fs::read_to_string(&full_path) {
Ok(content) => {
if content.as_bytes().iter().take(1024).any(|&b| b == 0) {
warn!(path = %path, "knowledge file appears to be binary, skipping");
return String::new();
}
let max = config.max_knowledge_chars;
if content.chars().count() > max {
content.chars().take(max).collect::<String>()
} else {
content
}
}
Err(e) => {
warn!(path = %path, full_path = %full_path.display(), err = %e, "failed to read knowledge file");
String::new()
}
}
}
pub fn build_deepening_prompt(finding: &Finding, company_context: &str) -> String {
let mut prompt = format!(
"Analyze this code finding and explain its impact.\n\n\
## Finding\n\
Rule: {}\n\
Message: {}\n\
Severity: {}\n\
File: {}:{}\n\
Code: {}\n",
finding.rule_id,
finding.message,
finding.severity,
finding.file_path,
finding.start_line,
finding.snippet,
);
if let Some(enclosing) = &finding.enclosing_context {
let truncated = if enclosing.len() > 2000 {
let boundary = enclosing[..2000]
.char_indices()
.next_back()
.map(|(i, c)| i + c.len_utf8())
.unwrap_or(0);
format!("{} [truncated]", &enclosing[..boundary])
} else {
enclosing.clone()
};
prompt.push_str(&format!("\n## Enclosing Function\n```\n{truncated}\n```\n"));
}
if let Some(ctx) = &finding.graph_context {
prompt.push_str(&format!(
"\n## Structural Context\n\
Symbol: {}\n\
Blast radius: {} symbols affected\n\
Callers ({}):\n",
ctx.symbol_name.as_deref().unwrap_or("unknown"),
ctx.blast_radius,
ctx.callers.len(),
));
for caller in ctx.callers.iter().take(10) {
prompt.push_str(&format!(" - {caller}\n"));
}
if !ctx.domain_tags.is_empty() {
prompt.push_str(&format!("Domain: {}\n", ctx.domain_tags.join(", ")));
}
}
if !company_context.is_empty() {
prompt.push_str(&format!("\n## Company Context\n{company_context}\n"));
}
prompt.push_str(
"\n## Task\n\
First, explain why this finding matters given the callers and blast radius above.\n\
Be concise (2-3 sentences). Focus on the real-world impact.\n\n\
Then, provide a suggested fix inside a ```diff fenced code block.\n\
If no fix is appropriate, omit the diff block.",
);
prompt
}
pub fn parse_llm_response(response: &str) -> (String, Option<String>) {
let fence_start_markers = ["```diff\n", "```diff\r\n"];
let mut fence_start = None;
let mut marker_len = 0;
for marker in &fence_start_markers {
if let Some(pos) = response.find(marker) {
if fence_start.is_none() || pos < fence_start.unwrap() {
fence_start = Some(pos);
marker_len = marker.len();
}
}
}
let Some(start_pos) = fence_start else {
return (response.trim().to_string(), None);
};
let content_start = start_pos + marker_len;
let mut closing_offset = None;
let mut offset = 0;
for line in response[content_start..].split_inclusive('\n') {
if line.trim() == "```" {
closing_offset = Some(offset);
break;
}
offset += line.len();
}
let Some(closing_offset) = closing_offset else {
return (response.trim().to_string(), None);
};
let fix = &response[content_start..content_start + closing_offset];
let fence_end = content_start + closing_offset + 3;
let before = response[..start_pos].trim();
let after = response[fence_end..].trim();
let analysis = match (before.is_empty(), after.is_empty()) {
(true, true) => String::new(),
(false, true) => before.to_string(),
(true, false) => after.to_string(),
(false, false) => format!("{before}\n\n{after}"),
};
let fix_str = fix.to_string();
let fix_opt = if fix_str.trim().is_empty() {
None
} else {
Some(fix_str)
};
(analysis, fix_opt)
}
pub fn build_agent_prompt(finding: &Finding, suggested_fix: &str) -> String {
let mut prompt = format!(
"In `@{file_path}` around lines {start}-{end}, {message}. Rule: {rule}.",
file_path = finding.file_path,
start = finding.start_line,
end = finding.end_line,
message = finding.message,
rule = finding.rule_id,
);
prompt.push_str(&format!(" Severity: {}.", finding.severity));
if let Some(ref cwe) = finding.cwe {
prompt.push_str(&format!(" CWE: {cwe}."));
}
prompt.push_str(&format!(
" Apply this fix:\n\n```diff\n{suggested_fix}\n```"
));
prompt
}
#[cfg(test)]
mod tests {
use super::*;
use cartomancer_core::severity::Severity;
fn make_finding() -> Finding {
Finding {
rule_id: "test.rule".into(),
message: "test message".into(),
severity: Severity::Error,
file_path: "src/lib.rs".into(),
start_line: 10,
end_line: 12,
snippet: "let x = dangerous();".into(),
cwe: None,
graph_context: None,
llm_analysis: None,
escalation_reasons: vec![],
is_new: None,
enclosing_context: None,
suggested_fix: None,
agent_prompt: None,
}
}
#[test]
fn build_prompt_includes_finding_details() {
let f = make_finding();
let prompt = build_deepening_prompt(&f, "");
assert!(prompt.contains("test.rule"));
assert!(prompt.contains("test message"));
assert!(prompt.contains("src/lib.rs:10"));
assert!(prompt.contains("let x = dangerous();"));
}
#[test]
fn build_prompt_with_enclosing_context() {
let mut f = make_finding();
f.enclosing_context = Some("fn handler() {\n let x = dangerous();\n}".into());
let prompt = build_deepening_prompt(&f, "");
assert!(prompt.contains("## Enclosing Function"));
assert!(prompt.contains("fn handler()"));
}
#[test]
fn build_prompt_without_enclosing_context() {
let f = make_finding();
let prompt = build_deepening_prompt(&f, "");
assert!(!prompt.contains("Enclosing Function"));
}
#[test]
fn build_prompt_truncates_long_enclosing_context() {
let mut f = make_finding();
let long_ctx = "x".repeat(3000);
f.enclosing_context = Some(long_ctx);
let prompt = build_deepening_prompt(&f, "");
assert!(prompt.contains("[truncated]"));
assert!(prompt.contains(&"x".repeat(2000)));
assert!(!prompt.contains(&"x".repeat(2001)));
}
#[test]
fn parse_response_with_diff_block() {
let response = "This is dangerous because of SQL injection.\n\n\
```diff\n\
-let q = format!(\"SELECT * FROM t WHERE id={}\", id);\n\
+let q = sqlx::query(\"SELECT * FROM t WHERE id=?\").bind(id);\n\
```\n\
Some trailing note.";
let (analysis, fix) = parse_llm_response(response);
assert!(analysis.contains("SQL injection"));
assert!(analysis.contains("trailing note"));
let fix = fix.expect("should have a fix");
assert!(fix.contains("-let q = format!"));
assert!(fix.contains("+let q = sqlx::query"));
}
#[test]
fn parse_response_without_diff_block() {
let response = "This finding has low impact, no fix needed.";
let (analysis, fix) = parse_llm_response(response);
assert_eq!(analysis, "This finding has low impact, no fix needed.");
assert!(fix.is_none());
}
#[test]
fn parse_response_with_empty_diff_block() {
let response = "Analysis here.\n\n```diff\n```";
let (analysis, fix) = parse_llm_response(response);
assert_eq!(analysis, "Analysis here.");
assert!(fix.is_none(), "empty diff block should normalize to None");
}
#[test]
fn parse_response_with_text_before_and_after_diff() {
let response = "Before text.\n\n```diff\n-old\n+new\n```\n\nAfter text.";
let (analysis, fix) = parse_llm_response(response);
assert!(analysis.contains("Before text."));
assert!(analysis.contains("After text."));
let fix = fix.unwrap();
assert!(fix.contains("-old"));
assert!(fix.contains("+new"));
}
#[test]
fn parse_response_backticks_inside_diff_not_treated_as_closing() {
let response = "Analysis.\n\n```diff\n-let s = \"```\";\n+let s = \"fixed\";\n```\n";
let (analysis, fix) = parse_llm_response(response);
assert_eq!(analysis, "Analysis.");
let fix = fix.expect("should extract diff");
assert!(fix.contains("-let s = \"```\";"));
assert!(fix.contains("+let s = \"fixed\";"));
}
#[test]
fn build_agent_prompt_includes_required_fields() {
let f = make_finding();
let fix = "-old line\n+new line";
let prompt = build_agent_prompt(&f, fix);
assert!(prompt.contains("@src/lib.rs"));
assert!(prompt.contains("lines 10-12"));
assert!(prompt.contains("test.rule"));
assert!(prompt.contains("test message"));
assert!(prompt.contains("```diff\n-old line\n+new line\n```"));
assert!(prompt.contains("Severity: error"));
}
#[test]
fn build_agent_prompt_includes_cwe_when_present() {
let mut f = make_finding();
f.cwe = Some("CWE-89".into());
let prompt = build_agent_prompt(&f, "-old\n+new");
assert!(prompt.contains("CWE: CWE-89"));
}
#[test]
fn build_prompt_truncates_multibyte_utf8_safely() {
let mut f = make_finding();
let ctx = "é".repeat(1500); f.enclosing_context = Some(ctx);
let prompt = build_deepening_prompt(&f, "");
assert!(prompt.contains("[truncated]"));
}
#[test]
fn build_prompt_truncates_4byte_emoji_safely() {
let mut f = make_finding();
let ctx = "🔥".repeat(501);
assert_eq!(ctx.len(), 2004);
f.enclosing_context = Some(ctx);
let prompt = build_deepening_prompt(&f, "");
assert!(prompt.contains("[truncated]"));
assert!(prompt.contains(&"🔥".repeat(500)));
}
#[test]
fn parse_response_multiple_diff_blocks_takes_first() {
let response = "Analysis.\n\n```diff\n-first\n+fixed\n```\n\nMore text.\n\n```diff\n-second\n+also fixed\n```";
let (analysis, fix) = parse_llm_response(response);
let fix = fix.expect("should extract first diff");
assert!(fix.contains("-first"));
assert!(!fix.contains("-second"));
assert!(analysis.contains("Analysis."));
}
struct MockProvider {
response: String,
}
#[async_trait::async_trait]
impl LlmProvider for MockProvider {
fn name(&self) -> &str {
"mock"
}
async fn health_check(&self) -> anyhow::Result<()> {
Ok(())
}
async fn complete(&self, _prompt: &str) -> anyhow::Result<String> {
Ok(self.response.clone())
}
}
#[tokio::test]
async fn deepen_trait_method_parses_and_populates_fields() {
let provider = MockProvider {
response: "This is dangerous.\n\n```diff\n-old\n+new\n```\n".into(),
};
let mut f = make_finding();
provider.deepen(&mut f).await.unwrap();
assert_eq!(f.llm_analysis.as_deref(), Some("This is dangerous."));
assert_eq!(f.suggested_fix.as_deref(), Some("-old\n+new\n"));
assert!(f.agent_prompt.is_some());
assert!(f.agent_prompt.as_ref().unwrap().contains("@src/lib.rs"));
}
#[test]
fn create_provider_rejects_zero_max_tokens() {
let config = LlmConfig {
provider: cartomancer_core::config::LlmBackend::Anthropic,
anthropic_api_key: Some("sk-test".into()),
max_tokens: 0,
..Default::default()
};
match create_provider(&config, None) {
Ok(_) => panic!("should reject max_tokens=0"),
Err(e) => assert!(e.to_string().contains("must be between"), "{e}"),
}
}
#[test]
fn create_provider_rejects_excessive_max_tokens() {
let config = LlmConfig {
provider: cartomancer_core::config::LlmBackend::Anthropic,
anthropic_api_key: Some("sk-test".into()),
max_tokens: 200_000,
..Default::default()
};
match create_provider(&config, None) {
Ok(_) => panic!("should reject max_tokens=200000"),
Err(e) => assert!(e.to_string().contains("must be between"), "{e}"),
}
}
#[test]
fn create_provider_accepts_valid_max_tokens() {
let config = LlmConfig {
provider: cartomancer_core::config::LlmBackend::Anthropic,
anthropic_api_key: Some("sk-test".into()),
max_tokens: 4096,
..Default::default()
};
let provider = create_provider(&config, None).unwrap();
assert_eq!(provider.name(), "anthropic");
}
#[tokio::test]
async fn deepen_trait_method_no_fix_leaves_fields_none() {
let provider = MockProvider {
response: "Low impact, no fix needed.".into(),
};
let mut f = make_finding();
provider.deepen(&mut f).await.unwrap();
assert_eq!(
f.llm_analysis.as_deref(),
Some("Low impact, no fix needed.")
);
assert!(f.suggested_fix.is_none());
assert!(f.agent_prompt.is_none());
}
#[test]
fn build_prompt_with_company_context() {
let f = make_finding();
let context = "We use SQLAlchemy ORM. Raw SQL is only in migrations.";
let prompt = build_deepening_prompt(&f, context);
assert!(prompt.contains("## Company Context"));
assert!(prompt.contains("SQLAlchemy ORM"));
}
#[test]
fn build_prompt_empty_company_context_omits_section() {
let f = make_finding();
let prompt = build_deepening_prompt(&f, "");
assert!(!prompt.contains("Company Context"));
}
#[test]
fn load_knowledge_missing_file_returns_empty() {
let tmp = tempfile::tempdir().unwrap();
let config = KnowledgeConfig {
knowledge_file: Some("nonexistent.md".into()),
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert!(result.is_empty());
}
#[test]
fn load_knowledge_valid_file_returns_content() {
let tmp = tempfile::tempdir().unwrap();
let knowledge_path = tmp.path().join("knowledge.md");
std::fs::write(&knowledge_path, "# Our Standards\nNo raw SQL.").unwrap();
let config = KnowledgeConfig {
knowledge_file: Some("knowledge.md".into()),
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert_eq!(result, "# Our Standards\nNo raw SQL.");
}
#[test]
fn load_knowledge_truncates_at_max_chars() {
let tmp = tempfile::tempdir().unwrap();
let knowledge_path = tmp.path().join("big.md");
std::fs::write(&knowledge_path, "x".repeat(10000)).unwrap();
let config = KnowledgeConfig {
knowledge_file: Some("big.md".into()),
max_knowledge_chars: 100,
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert_eq!(result.chars().count(), 100);
}
#[test]
fn load_knowledge_truncates_multibyte_safely() {
let tmp = tempfile::tempdir().unwrap();
let knowledge_path = tmp.path().join("utf8.md");
let content = "\u{00e9}".repeat(100);
std::fs::write(&knowledge_path, &content).unwrap();
let config = KnowledgeConfig {
knowledge_file: Some("utf8.md".into()),
max_knowledge_chars: 50,
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert_eq!(result.chars().count(), 50);
assert!(!result.is_empty());
}
#[test]
fn load_knowledge_path_traversal_rejected() {
let tmp = tempfile::tempdir().unwrap();
let config = KnowledgeConfig {
knowledge_file: Some("../../etc/passwd".into()),
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert!(result.is_empty());
}
#[test]
fn load_knowledge_binary_file_rejected() {
let tmp = tempfile::tempdir().unwrap();
let bin_path = tmp.path().join("binary.dat");
let mut content = vec![0u8; 512];
content.extend_from_slice(b"some text after nulls");
std::fs::write(&bin_path, &content).unwrap();
let config = KnowledgeConfig {
knowledge_file: Some("binary.dat".into()),
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert!(result.is_empty());
}
#[test]
fn load_knowledge_none_file_returns_empty() {
let tmp = tempfile::tempdir().unwrap();
let config = KnowledgeConfig {
knowledge_file: None,
..Default::default()
};
let result = load_knowledge(tmp.path(), &config);
assert!(result.is_empty());
}
}