ai/
multi_step_analysis.rs

1use serde::{Deserialize, Serialize};
2use serde_json::json;
3use async_openai::types::{ChatCompletionTool, ChatCompletionToolType, FunctionObjectArgs};
4use anyhow::Result;
5// TODO: Migrate to unified types from generation module
6
7/// File analysis result from the analyze function
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct FileAnalysisResult {
10  pub lines_added:   u32,
11  pub lines_removed: u32,
12  pub file_category: String,
13  pub summary:       String
14}
15
16/// File data with analysis results for scoring
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct FileDataForScoring {
19  pub file_path:      String,
20  pub operation_type: String,
21  pub lines_added:    u32,
22  pub lines_removed:  u32,
23  pub file_category:  String,
24  pub summary:        String
25}
26
27/// File data with calculated impact score  
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct FileWithScore {
30  pub file_path:      String,
31  pub operation_type: String,
32  pub lines_added:    u32,
33  pub lines_removed:  u32,
34  pub file_category:  String,
35  pub summary:        String,
36  pub impact_score:   f32
37}
38
39/// Score calculation result
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ScoreResult {
42  pub files_with_scores: Vec<FileWithScore>
43}
44
45/// Commit message generation result
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct GenerateResult {
48  pub candidates: Vec<String>,
49  pub reasoning:  String
50}
51
52/// Creates the analyze function tool definition
53pub fn create_analyze_function_tool() -> Result<ChatCompletionTool> {
54  log::debug!("Creating analyze function tool");
55
56  let function = FunctionObjectArgs::default()
57    .name("analyze")
58    .description("Analyze a single file's changes from the git diff")
59    .parameters(json!({
60        "type": "object",
61        "properties": {
62            "file_path": {
63                "type": "string",
64                "description": "Relative path to the file"
65            },
66            "diff_content": {
67                "type": "string",
68                "description": "The git diff content for this specific file only"
69            },
70            "operation_type": {
71                "type": "string",
72                "enum": ["added", "modified", "deleted", "renamed", "binary"],
73                "description": "Type of operation performed on the file"
74            }
75        },
76        "required": ["file_path", "diff_content", "operation_type"]
77    }))
78    .build()?;
79
80  Ok(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function })
81}
82
83/// Creates the score function tool definition
84pub fn create_score_function_tool() -> Result<ChatCompletionTool> {
85  log::debug!("Creating score function tool");
86
87  let function = FunctionObjectArgs::default()
88    .name("score")
89    .description("Calculate impact scores for all analyzed files")
90    .parameters(json!({
91        "type": "object",
92        "properties": {
93            "files_data": {
94                "type": "array",
95                "description": "Array of analyzed file data",
96                "items": {
97                    "type": "object",
98                    "properties": {
99                        "file_path": {
100                            "type": "string",
101                            "description": "Relative path to the file"
102                        },
103                        "operation_type": {
104                            "type": "string",
105                            "enum": ["added", "modified", "deleted", "renamed", "binary"],
106                            "description": "Type of operation performed on the file"
107                        },
108                        "lines_added": {
109                            "type": "integer",
110                            "description": "Number of lines added",
111                            "minimum": 0
112                        },
113                        "lines_removed": {
114                            "type": "integer",
115                            "description": "Number of lines removed",
116                            "minimum": 0
117                        },
118                        "file_category": {
119                            "type": "string",
120                            "enum": ["source", "test", "config", "docs", "binary", "build"],
121                            "description": "Category of the file"
122                        },
123                        "summary": {
124                            "type": "string",
125                            "description": "Brief description of changes"
126                        }
127                    },
128                    "required": ["file_path", "operation_type", "lines_added", "lines_removed", "file_category", "summary"]
129                }
130            }
131        },
132        "required": ["files_data"]
133    }))
134    .build()?;
135
136  Ok(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function })
137}
138
139/// Creates the generate function tool definition
140pub fn create_generate_function_tool() -> Result<ChatCompletionTool> {
141  log::debug!("Creating generate function tool");
142
143  let function = FunctionObjectArgs::default()
144    .name("generate")
145    .description("Generate commit message candidates based on scored files")
146    .parameters(json!({
147        "type": "object",
148        "properties": {
149            "files_with_scores": {
150                "type": "array",
151                "description": "All files with calculated impact scores",
152                "items": {
153                    "type": "object",
154                    "properties": {
155                        "file_path": {
156                            "type": "string"
157                        },
158                        "operation_type": {
159                            "type": "string"
160                        },
161                        "lines_added": {
162                            "type": "integer"
163                        },
164                        "lines_removed": {
165                            "type": "integer"
166                        },
167                        "file_category": {
168                            "type": "string"
169                        },
170                        "summary": {
171                            "type": "string"
172                        },
173                        "impact_score": {
174                            "type": "number",
175                            "minimum": 0.0,
176                            "maximum": 1.0
177                        }
178                    },
179                    "required": ["file_path", "operation_type", "lines_added", "lines_removed", "file_category", "summary", "impact_score"]
180                }
181            },
182            "max_length": {
183                "type": "integer",
184                "description": "Maximum character length for commit message",
185                "default": 72
186            }
187        },
188        "required": ["files_with_scores"]
189    }))
190    .build()?;
191
192  Ok(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function })
193}
194
195/// Analyzes a single file's changes
196pub fn analyze_file(file_path: &str, diff_content: &str, operation_type: &str) -> FileAnalysisResult {
197  log::debug!("Analyzing file: {file_path} ({operation_type})");
198
199  // Count lines added and removed
200  let mut lines_added = 0u32;
201  let mut lines_removed = 0u32;
202
203  for line in diff_content.lines() {
204    if line.starts_with('+') && !line.starts_with("+++") {
205      lines_added += 1;
206    } else if line.starts_with('-') && !line.starts_with("---") {
207      lines_removed += 1;
208    }
209  }
210
211  // Determine file category
212  let file_category = categorize_file(file_path);
213
214  // Generate summary based on diff content
215  let summary = generate_file_summary(file_path, diff_content, operation_type);
216
217  log::debug!("File analysis complete: +{lines_added} -{lines_removed} lines, category: {file_category}");
218
219  FileAnalysisResult { lines_added, lines_removed, file_category, summary }
220}
221
222/// Calculates impact scores for all files
223pub fn calculate_impact_scores(files_data: Vec<FileDataForScoring>) -> ScoreResult {
224  log::debug!("Calculating impact scores for {} files", files_data.len());
225
226  let mut files_with_scores = Vec::new();
227
228  for file_data in files_data {
229    let impact_score = calculate_single_impact_score(&file_data);
230
231    files_with_scores.push(FileWithScore {
232      file_path: file_data.file_path,
233      operation_type: file_data.operation_type,
234      lines_added: file_data.lines_added,
235      lines_removed: file_data.lines_removed,
236      file_category: file_data.file_category,
237      summary: file_data.summary,
238      impact_score
239    });
240  }
241
242  // Sort by impact score descending
243  files_with_scores.sort_by(|a, b| {
244    b.impact_score
245      .partial_cmp(&a.impact_score)
246      .unwrap_or(std::cmp::Ordering::Equal)
247  });
248
249  ScoreResult { files_with_scores }
250}
251
252/// Generates commit message candidates
253pub fn generate_commit_messages(files_with_scores: Vec<FileWithScore>, max_length: usize) -> GenerateResult {
254  log::debug!("Generating commit messages (max length: {max_length})");
255
256  // Find the highest impact changes
257  let primary_change = files_with_scores.first();
258  let mut candidates = Vec::new();
259
260  if let Some(primary) = primary_change {
261    // Generate different styles of commit messages
262
263    // Style 1: Action-focused
264    let action_msg = generate_action_message(primary, &files_with_scores, max_length);
265    candidates.push(action_msg);
266
267    // Style 2: Component-focused
268    let component_msg = generate_component_message(primary, &files_with_scores, max_length);
269    candidates.push(component_msg);
270
271    // Style 3: Impact-focused
272    let impact_msg = generate_impact_message(primary, &files_with_scores, max_length);
273    candidates.push(impact_msg);
274  }
275
276  let reasoning = generate_reasoning(&files_with_scores);
277
278  GenerateResult { candidates, reasoning }
279}
280
281// Helper functions
282
283fn categorize_file(file_path: &str) -> String {
284  let path = file_path.to_lowercase();
285
286  if path.ends_with(".test.js")
287    || path.ends_with(".spec.js")
288    || path.ends_with("_test.go")
289    || path.ends_with("_test.rs")
290    || path.contains("/test/")
291    || path.contains("/tests/")
292  {
293    "test".to_string()
294  } else if path.ends_with(".md") || path.ends_with(".txt") || path.ends_with(".rst") || path.contains("/docs/") {
295    "docs".to_string()
296  } else if path == "package.json"
297    || path == "cargo.toml"
298    || path == "go.mod"
299    || path == "requirements.txt"
300    || path == "gemfile"
301    || path.ends_with(".lock")
302  {
303    "build".to_string()
304  } else if path.ends_with(".yml")
305    || path.ends_with(".yaml")
306    || path.ends_with(".json")
307    || path.ends_with(".toml")
308    || path.ends_with(".ini")
309    || path.ends_with(".conf")
310    || path.contains("config")
311    || path.contains(".github/")
312  {
313    "config".to_string()
314  } else if path.ends_with(".png")
315    || path.ends_with(".jpg")
316    || path.ends_with(".gif")
317    || path.ends_with(".ico")
318    || path.ends_with(".pdf")
319    || path.ends_with(".zip")
320  {
321    "binary".to_string()
322  } else {
323    "source".to_string()
324  }
325}
326
327fn generate_file_summary(file_path: &str, _diff_content: &str, operation_type: &str) -> String {
328  // This is a simplified version - in practice, you'd analyze the diff content
329  // more thoroughly to generate meaningful summaries
330  match operation_type {
331    "added" => format!("New {} file added", categorize_file(file_path)),
332    "deleted" => format!("Removed {} file", categorize_file(file_path)),
333    "renamed" => "File renamed".to_string(),
334    "binary" => "Binary file updated".to_string(),
335    _ => "File modified".to_string()
336  }
337}
338
339fn calculate_single_impact_score(file_data: &FileDataForScoring) -> f32 {
340  let mut score = 0.0f32;
341
342  // Base score from operation type
343  score += match file_data.operation_type.as_str() {
344    "added" => 0.3,
345    "modified" => 0.2,
346    "deleted" => 0.25,
347    "renamed" => 0.1,
348    "binary" => 0.05,
349    _ => 0.1
350  };
351
352  // Score from file category
353  score += match file_data.file_category.as_str() {
354    "source" => 0.4,
355    "test" => 0.2,
356    "config" => 0.25,
357    "build" => 0.3,
358    "docs" => 0.1,
359    "binary" => 0.05,
360    _ => 0.1
361  };
362
363  // Score from lines changed (normalized)
364  let total_lines = file_data.lines_added + file_data.lines_removed;
365  let line_score = (total_lines as f32 / 100.0).min(0.3);
366  score += line_score;
367
368  score.min(1.0) // Cap at 1.0
369}
370
371fn generate_action_message(primary: &FileWithScore, _all_files: &[FileWithScore], max_length: usize) -> String {
372  let base = match primary.operation_type.as_str() {
373    "added" => "Add",
374    "modified" => "Update",
375    "deleted" => "Remove",
376    "renamed" => "Rename",
377    _ => "Change"
378  };
379
380  let component = extract_component_name(&primary.file_path);
381  let message = format!("{base} {component}");
382
383  if message.len() > max_length {
384    message.chars().take(max_length).collect()
385  } else {
386    message
387  }
388}
389
390fn generate_component_message(primary: &FileWithScore, _all_files: &[FileWithScore], max_length: usize) -> String {
391  let component = extract_component_name(&primary.file_path);
392  let action = match primary.operation_type.as_str() {
393    "added" => "implementation",
394    "modified" => "updates",
395    "deleted" => "removal",
396    _ => "changes"
397  };
398
399  let message = format!("{component}: {action}");
400
401  if message.len() > max_length {
402    message.chars().take(max_length).collect()
403  } else {
404    message
405  }
406}
407
408fn generate_impact_message(primary: &FileWithScore, all_files: &[FileWithScore], max_length: usize) -> String {
409  let impact_type = if all_files
410    .iter()
411    .any(|f| f.file_category == "source" && f.operation_type == "added")
412  {
413    "feature"
414  } else if all_files.iter().any(|f| f.file_category == "test") {
415    "test"
416  } else if all_files.iter().any(|f| f.file_category == "config") {
417    "configuration"
418  } else {
419    "update"
420  };
421
422  let component = extract_component_name(&primary.file_path);
423  let message = format!(
424    "{} {} for {}",
425    if impact_type == "feature" {
426      "New"
427    } else {
428      "Update"
429    },
430    impact_type,
431    component
432  );
433
434  if message.len() > max_length {
435    message.chars().take(max_length).collect()
436  } else {
437    message
438  }
439}
440
441fn extract_component_name(file_path: &str) -> String {
442  let path_parts: Vec<&str> = file_path.split('/').collect();
443
444  if let Some(filename) = path_parts.last() {
445    // Remove extension
446    let name_parts: Vec<&str> = filename.split('.').collect();
447    if name_parts.len() > 1 {
448      name_parts[0].to_string()
449    } else {
450      filename.to_string()
451    }
452  } else {
453    "component".to_string()
454  }
455}
456
457fn generate_reasoning(files_with_scores: &[FileWithScore]) -> String {
458  if files_with_scores.is_empty() {
459    return "No files to analyze".to_string();
460  }
461
462  let primary = &files_with_scores[0];
463  let total_files = files_with_scores.len();
464  let total_lines: u32 = files_with_scores
465    .iter()
466    .map(|f| f.lines_added + f.lines_removed)
467    .sum();
468
469  format!(
470    "{} changes have highest impact ({:.2}) affecting {} functionality. \
471        Total {} files changed with {} lines modified.",
472    primary
473      .file_category
474      .chars()
475      .next()
476      .unwrap_or('u')
477      .to_uppercase()
478      .collect::<String>()
479      + primary.file_category.get(1..).unwrap_or(""),
480    primary.impact_score,
481    extract_component_name(&primary.file_path),
482    total_files,
483    total_lines
484  )
485}
486
487#[cfg(test)]
488mod tests {
489  use super::*;
490
491  #[test]
492  fn test_file_categorization() {
493    assert_eq!(categorize_file("src/main.rs"), "source");
494    assert_eq!(categorize_file("tests/integration_test.rs"), "test");
495    assert_eq!(categorize_file("package.json"), "build");
496    assert_eq!(categorize_file(".github/workflows/ci.yml"), "config");
497    assert_eq!(categorize_file("README.md"), "docs");
498    assert_eq!(categorize_file("logo.png"), "binary");
499  }
500
501  #[test]
502  fn test_impact_score_calculation() {
503    let file_data = FileDataForScoring {
504      file_path:      "src/auth.rs".to_string(),
505      operation_type: "modified".to_string(),
506      lines_added:    50,
507      lines_removed:  20,
508      file_category:  "source".to_string(),
509      summary:        "Updated authentication logic".to_string()
510    };
511
512    let score = calculate_single_impact_score(&file_data);
513    assert!(score > 0.0 && score <= 1.0);
514  }
515}