use serde::{Deserialize, Serialize};
use serde_json::json;
use async_openai::types::{ChatCompletionTool, ChatCompletionToolType, FunctionObjectArgs};
use anyhow::Result;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileAnalysisResult {
pub lines_added: u32,
pub lines_removed: u32,
pub file_category: String,
pub summary: String
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileDataForScoring {
pub file_path: String,
pub operation_type: String,
pub lines_added: u32,
pub lines_removed: u32,
pub file_category: String,
pub summary: String
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileWithScore {
pub file_path: String,
pub operation_type: String,
pub lines_added: u32,
pub lines_removed: u32,
pub file_category: String,
pub summary: String,
pub impact_score: f32
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoreResult {
pub files_with_scores: Vec<FileWithScore>
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerateResult {
pub candidates: Vec<String>,
pub reasoning: String
}
pub fn create_analyze_function_tool() -> Result<ChatCompletionTool> {
log::debug!("Creating analyze function tool");
let function = FunctionObjectArgs::default()
.name("analyze")
.description("Analyze a single file's changes from the git diff")
.parameters(json!({
"type": "object",
"properties": {
"file_path": {
"type": "string",
"description": "Relative path to the file"
},
"diff_content": {
"type": "string",
"description": "The git diff content for this specific file only"
},
"operation_type": {
"type": "string",
"enum": ["added", "modified", "deleted", "renamed", "binary"],
"description": "Type of operation performed on the file"
}
},
"required": ["file_path", "diff_content", "operation_type"]
}))
.build()?;
Ok(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function })
}
pub fn create_score_function_tool() -> Result<ChatCompletionTool> {
log::debug!("Creating score function tool");
let function = FunctionObjectArgs::default()
.name("score")
.description("Calculate impact scores for all analyzed files")
.parameters(json!({
"type": "object",
"properties": {
"files_data": {
"type": "array",
"description": "Array of analyzed file data",
"items": {
"type": "object",
"properties": {
"file_path": {
"type": "string",
"description": "Relative path to the file"
},
"operation_type": {
"type": "string",
"enum": ["added", "modified", "deleted", "renamed", "binary"],
"description": "Type of operation performed on the file"
},
"lines_added": {
"type": "integer",
"description": "Number of lines added",
"minimum": 0
},
"lines_removed": {
"type": "integer",
"description": "Number of lines removed",
"minimum": 0
},
"file_category": {
"type": "string",
"enum": ["source", "test", "config", "docs", "binary", "build"],
"description": "Category of the file"
},
"summary": {
"type": "string",
"description": "Brief description of changes"
}
},
"required": ["file_path", "operation_type", "lines_added", "lines_removed", "file_category", "summary"]
}
}
},
"required": ["files_data"]
}))
.build()?;
Ok(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function })
}
pub fn create_generate_function_tool() -> Result<ChatCompletionTool> {
log::debug!("Creating generate function tool");
let function = FunctionObjectArgs::default()
.name("generate")
.description("Generate commit message candidates based on scored files")
.parameters(json!({
"type": "object",
"properties": {
"files_with_scores": {
"type": "array",
"description": "All files with calculated impact scores",
"items": {
"type": "object",
"properties": {
"file_path": {
"type": "string"
},
"operation_type": {
"type": "string"
},
"lines_added": {
"type": "integer"
},
"lines_removed": {
"type": "integer"
},
"file_category": {
"type": "string"
},
"summary": {
"type": "string"
},
"impact_score": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0
}
},
"required": ["file_path", "operation_type", "lines_added", "lines_removed", "file_category", "summary", "impact_score"]
}
},
"max_length": {
"type": "integer",
"description": "Maximum character length for commit message",
"default": 72
}
},
"required": ["files_with_scores"]
}))
.build()?;
Ok(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function })
}
pub fn analyze_file(file_path: &str, diff_content: &str, operation_type: &str) -> FileAnalysisResult {
log::debug!("Analyzing file: {file_path} ({operation_type})");
let mut lines_added = 0u32;
let mut lines_removed = 0u32;
for line in diff_content.lines() {
if line.starts_with('+') && !line.starts_with("+++") {
lines_added += 1;
} else if line.starts_with('-') && !line.starts_with("---") {
lines_removed += 1;
}
}
let file_category = categorize_file(file_path);
let summary = generate_file_summary(file_path, diff_content, operation_type);
log::debug!("File analysis complete: +{lines_added} -{lines_removed} lines, category: {file_category}");
FileAnalysisResult { lines_added, lines_removed, file_category, summary }
}
pub fn calculate_impact_scores(files_data: Vec<FileDataForScoring>) -> ScoreResult {
log::debug!("Calculating impact scores for {} files", files_data.len());
let mut files_with_scores = Vec::new();
for file_data in files_data {
let impact_score = calculate_single_impact_score(&file_data);
files_with_scores.push(FileWithScore {
file_path: file_data.file_path,
operation_type: file_data.operation_type,
lines_added: file_data.lines_added,
lines_removed: file_data.lines_removed,
file_category: file_data.file_category,
summary: file_data.summary,
impact_score
});
}
files_with_scores.sort_by(|a, b| {
b.impact_score
.partial_cmp(&a.impact_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
ScoreResult { files_with_scores }
}
pub fn generate_commit_messages(files_with_scores: Vec<FileWithScore>, max_length: usize) -> GenerateResult {
log::debug!("Generating commit messages (max length: {max_length})");
let primary_change = files_with_scores.first();
let mut candidates = Vec::new();
if let Some(primary) = primary_change {
let action_msg = generate_action_message(primary, &files_with_scores, max_length);
candidates.push(action_msg);
let component_msg = generate_component_message(primary, &files_with_scores, max_length);
candidates.push(component_msg);
let impact_msg = generate_impact_message(primary, &files_with_scores, max_length);
candidates.push(impact_msg);
}
let reasoning = generate_reasoning(&files_with_scores);
GenerateResult { candidates, reasoning }
}
fn categorize_file(file_path: &str) -> String {
let path = file_path.to_lowercase();
if path.ends_with(".test.js")
|| path.ends_with(".spec.js")
|| path.ends_with("_test.go")
|| path.ends_with("_test.rs")
|| path.contains("/test/")
|| path.contains("/tests/")
{
"test".to_string()
} else if path.ends_with(".md") || path.ends_with(".txt") || path.ends_with(".rst") || path.contains("/docs/") {
"docs".to_string()
} else if path == "package.json"
|| path == "cargo.toml"
|| path == "go.mod"
|| path == "requirements.txt"
|| path == "gemfile"
|| path.ends_with(".lock")
{
"build".to_string()
} else if path.ends_with(".yml")
|| path.ends_with(".yaml")
|| path.ends_with(".json")
|| path.ends_with(".toml")
|| path.ends_with(".ini")
|| path.ends_with(".conf")
|| path.contains("config")
|| path.contains(".github/")
{
"config".to_string()
} else if path.ends_with(".png")
|| path.ends_with(".jpg")
|| path.ends_with(".gif")
|| path.ends_with(".ico")
|| path.ends_with(".pdf")
|| path.ends_with(".zip")
{
"binary".to_string()
} else {
"source".to_string()
}
}
fn generate_file_summary(file_path: &str, _diff_content: &str, operation_type: &str) -> String {
match operation_type {
"added" => format!("New {} file added", categorize_file(file_path)),
"deleted" => format!("Removed {} file", categorize_file(file_path)),
"renamed" => "File renamed".to_string(),
"binary" => "Binary file updated".to_string(),
_ => "File modified".to_string()
}
}
fn calculate_single_impact_score(file_data: &FileDataForScoring) -> f32 {
let mut score = 0.0f32;
score += match file_data.operation_type.as_str() {
"added" => 0.3,
"modified" => 0.2,
"deleted" => 0.25,
"renamed" => 0.1,
"binary" => 0.05,
_ => 0.1
};
score += match file_data.file_category.as_str() {
"source" => 0.4,
"test" => 0.2,
"config" => 0.25,
"build" => 0.3,
"docs" => 0.1,
"binary" => 0.05,
_ => 0.1
};
let total_lines = file_data.lines_added + file_data.lines_removed;
let line_score = (total_lines as f32 / 100.0).min(0.3);
score += line_score;
score.min(1.0) }
fn generate_action_message(primary: &FileWithScore, _all_files: &[FileWithScore], max_length: usize) -> String {
let base = match primary.operation_type.as_str() {
"added" => "Add",
"modified" => "Update",
"deleted" => "Remove",
"renamed" => "Rename",
_ => "Change"
};
let component = extract_component_name(&primary.file_path);
let message = format!("{base} {component}");
if message.len() > max_length {
message.chars().take(max_length).collect()
} else {
message
}
}
fn generate_component_message(primary: &FileWithScore, _all_files: &[FileWithScore], max_length: usize) -> String {
let component = extract_component_name(&primary.file_path);
let action = match primary.operation_type.as_str() {
"added" => "implementation",
"modified" => "updates",
"deleted" => "removal",
_ => "changes"
};
let message = format!("{component}: {action}");
if message.len() > max_length {
message.chars().take(max_length).collect()
} else {
message
}
}
fn generate_impact_message(primary: &FileWithScore, all_files: &[FileWithScore], max_length: usize) -> String {
let impact_type = if all_files
.iter()
.any(|f| f.file_category == "source" && f.operation_type == "added")
{
"feature"
} else if all_files.iter().any(|f| f.file_category == "test") {
"test"
} else if all_files.iter().any(|f| f.file_category == "config") {
"configuration"
} else {
"update"
};
let component = extract_component_name(&primary.file_path);
let message = format!(
"{} {} for {}",
if impact_type == "feature" {
"New"
} else {
"Update"
},
impact_type,
component
);
if message.len() > max_length {
message.chars().take(max_length).collect()
} else {
message
}
}
fn extract_component_name(file_path: &str) -> String {
let path_parts: Vec<&str> = file_path.split('/').collect();
if let Some(filename) = path_parts.last() {
let name_parts: Vec<&str> = filename.split('.').collect();
if name_parts.len() > 1 {
name_parts[0].to_string()
} else {
filename.to_string()
}
} else {
"component".to_string()
}
}
fn generate_reasoning(files_with_scores: &[FileWithScore]) -> String {
if files_with_scores.is_empty() {
return "No files to analyze".to_string();
}
let primary = &files_with_scores[0];
let total_files = files_with_scores.len();
let total_lines: u32 = files_with_scores
.iter()
.map(|f| f.lines_added + f.lines_removed)
.sum();
format!(
"{} changes have highest impact ({:.2}) affecting {} functionality. \
Total {} files changed with {} lines modified.",
primary
.file_category
.chars()
.next()
.unwrap_or('u')
.to_uppercase()
.collect::<String>()
+ primary.file_category.get(1..).unwrap_or(""),
primary.impact_score,
extract_component_name(&primary.file_path),
total_files,
total_lines
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_file_categorization() {
assert_eq!(categorize_file("src/main.rs"), "source");
assert_eq!(categorize_file("tests/integration_test.rs"), "test");
assert_eq!(categorize_file("package.json"), "build");
assert_eq!(categorize_file(".github/workflows/ci.yml"), "config");
assert_eq!(categorize_file("README.md"), "docs");
assert_eq!(categorize_file("logo.png"), "binary");
}
#[test]
fn test_impact_score_calculation() {
let file_data = FileDataForScoring {
file_path: "src/auth.rs".to_string(),
operation_type: "modified".to_string(),
lines_added: 50,
lines_removed: 20,
file_category: "source".to_string(),
summary: "Updated authentication logic".to_string()
};
let score = calculate_single_impact_score(&file_data);
assert!(score > 0.0 && score <= 1.0);
}
}