ralph_workflow/phases/
commit.rs

1//! Commit message generation phase.
2//!
3//! This module handles automated commit message generation using the standard
4//! agent pipeline with fallback support. It replaces the custom implementation
5//! in repo.rs that lacked proper logging and fallback handling.
6//!
7//! The phase:
8//! 1. Takes a git diff as input
9//! 2. Runs the commit agent with the diff via the standard pipeline
10//! 3. Extracts the commit message from agent output
11//! 4. Returns the generated message for use by the caller
12
13use super::commit_logging::{
14    AttemptOutcome, CommitAttemptLog, CommitLogSession, ExtractionAttempt,
15};
16use super::context::PhaseContext;
17use crate::agents::{AgentRegistry, AgentRole};
18use crate::checkpoint::execution_history::{ExecutionStep, StepOutcome};
19use crate::files::llm_output_extraction::{
20    archive_xml_file_with_workspace, preprocess_raw_content, try_extract_from_file_with_workspace,
21    try_extract_xml_commit_with_trace, xml_paths, CommitExtractionResult,
22};
23use crate::git_helpers::{git_add_all, git_commit, CommitResultFallback};
24use crate::logger::Logger;
25use crate::pipeline::PipelineRuntime;
26use crate::prompts::{
27    get_stored_or_generate_prompt, prompt_generate_commit_message_with_diff_with_context,
28    prompt_simplified_commit_with_context, prompt_xsd_retry_with_context,
29};
30use std::collections::HashMap;
31use std::fmt;
32use std::path::Path;
33
34/// Preview a commit message for display (first line, truncated if needed).
35fn preview_commit_message(msg: &str) -> String {
36    let first_line = msg.lines().next().unwrap_or(msg);
37    if first_line.len() > 60 {
38        format!("{}...", &first_line[..60])
39    } else {
40        first_line.to_string()
41    }
42}
43
44/// Maximum safe prompt size in bytes before pre-truncation.
45///
46/// This is a conservative limit to prevent agents from failing with "prompt too long"
47/// errors. Different agents have different token limits:
48/// - GLM: ~100KB effective limit
49/// - Claude CCS: ~300KB effective limit
50/// - Others: vary by model
51///
52/// We use 200KB as a safe middle ground that works for most agents while still
53/// allowing substantial diffs to be processed without truncation.
54const MAX_SAFE_PROMPT_SIZE: usize = 200_000;
55
56/// Absolute last resort fallback commit message.
57///
58/// This is used ONLY when all other methods fail:
59/// - All 8 prompt variants exhausted
60/// - All agents in fallback chain exhausted
61/// - All truncation stages failed
62/// - Emergency no-diff prompt failed
63/// - Deterministic fallback from diff failed
64///
65/// This ensures the commit process NEVER fails completely.
66pub(crate) const HARDCODED_FALLBACK_COMMIT: &str = "chore: automated commit";
67
68/// Get the maximum safe prompt size for a specific agent.
69///
70/// Different agents have different token limits. This function returns a
71/// conservative max size for the given agent to prevent "prompt too long" errors.
72///
73/// # Arguments
74///
75/// * `commit_agent` - The commit agent command string
76///
77/// # Returns
78///
79/// Maximum safe prompt size in bytes
80fn max_prompt_size_for_agent(commit_agent: &str) -> usize {
81    let agent_lower = commit_agent.to_lowercase();
82
83    // GLM and similar agents have smaller effective limits
84    if agent_lower.contains("glm")
85        || agent_lower.contains("zhipuai")
86        || agent_lower.contains("zai")
87        || agent_lower.contains("qwen")
88        || agent_lower.contains("deepseek")
89    {
90        100_000 // 100KB for GLM-like agents
91    } else if agent_lower.contains("claude")
92        || agent_lower.contains("ccs")
93        || agent_lower.contains("anthropic")
94    {
95        300_000 // 300KB for Claude-based agents
96    } else {
97        MAX_SAFE_PROMPT_SIZE // Default 200KB
98    }
99}
100
101/// Retry strategy for commit message generation.
102///
103/// Tracks which stage of re-prompting we're in, allowing for progressive
104/// degradation from detailed prompts to minimal ones before falling back
105/// to the next agent in the chain.
106///
107/// With XSD validation, we now have two strategies. Each strategy supports
108/// up to 5 in-session retries with validation feedback.
109///
110/// The XSD retry mechanism is used internally for in-session retries when
111/// validation fails, not as a separate stage.
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113enum CommitRetryStrategy {
114    /// First attempt with normal XML prompt
115    Normal,
116    /// Simplified XML prompt - more direct instructions
117    Simplified,
118}
119
120impl CommitRetryStrategy {
121    /// Get the description of this retry stage for logging
122    const fn description(self) -> &'static str {
123        match self {
124            Self::Normal => "normal XML prompt",
125            Self::Simplified => "simplified XML prompt",
126        }
127    }
128
129    /// Get the next retry strategy, or None if this is the last stage
130    const fn next(self) -> Option<Self> {
131        match self {
132            Self::Normal => Some(Self::Simplified),
133            Self::Simplified => None,
134        }
135    }
136
137    /// Get the 1-based stage number for this strategy
138    const fn stage_number(self) -> usize {
139        match self {
140            Self::Normal => 1,
141            Self::Simplified => 2,
142        }
143    }
144
145    /// Get the total number of retry stages
146    const fn total_stages() -> usize {
147        2 // Normal + Simplified
148    }
149
150    /// Get the maximum number of in-session retries for this strategy
151    const fn max_session_retries(self) -> usize {
152        match self {
153            Self::Normal => crate::reducer::state::MAX_VALIDATION_RETRY_ATTEMPTS as usize,
154            Self::Simplified => crate::reducer::state::MAX_VALIDATION_RETRY_ATTEMPTS as usize,
155        }
156    }
157}
158
159impl fmt::Display for CommitRetryStrategy {
160    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161        write!(f, "{}", self.description())
162    }
163}
164
165/// Result of commit message generation.
166pub struct CommitMessageResult {
167    /// The generated commit message (may be empty on failure)
168    pub message: String,
169    /// Whether the generation was successful
170    pub success: bool,
171    /// Path to the agent log file for debugging (currently unused but kept for API compatibility)
172    pub _log_path: String,
173    /// Prompts that were generated during this commit generation (key -> prompt)
174    /// This is used for capturing prompts in checkpoints for deterministic resume
175    pub generated_prompts: std::collections::HashMap<String, String>,
176}
177
178/// Truncate diff if it's too large for agents with small context windows.
179///
180/// This is a defensive measure when agents report "prompt too long" errors.
181/// Returns a truncated diff with a summary of omitted content.
182///
183/// # Semantic Awareness
184///
185/// The improved truncation:
186/// 1. Preserves file structure - truncates at file boundaries (after `diff --git` blocks)
187/// 2. Prioritizes important files - keeps files from `src/` over `tests/`, `.md` files, etc.
188/// 3. Preserves last N files - shows what changed at the end
189/// 4. Adds a summary header - includes "First M files shown, N files truncated"
190fn truncate_diff_if_large(diff: &str, max_size: usize) -> String {
191    if diff.len() <= max_size {
192        return diff.to_string();
193    }
194
195    // Parse the diff into individual file blocks
196    let mut files: Vec<DiffFile> = Vec::new();
197    let mut current_file = DiffFile::default();
198    let mut in_file = false;
199
200    for line in diff.lines() {
201        if line.starts_with("diff --git ") {
202            // Save previous file if any
203            if in_file && !current_file.lines.is_empty() {
204                files.push(std::mem::take(&mut current_file));
205            }
206            in_file = true;
207            current_file.lines.push(line.to_string());
208
209            // Extract and prioritize the file path
210            if let Some(path) = line.split(" b/").nth(1) {
211                current_file.path = path.to_string();
212                current_file.priority = prioritize_file_path(path);
213            }
214        } else if in_file {
215            current_file.lines.push(line.to_string());
216        }
217    }
218
219    // Don't forget the last file
220    if in_file && !current_file.lines.is_empty() {
221        files.push(current_file);
222    }
223
224    let total_files = files.len();
225
226    // Sort files by priority (highest first) to keep important files
227    files.sort_by_key(|f| std::cmp::Reverse(f.priority));
228
229    // Greedily select files that fit within max_size
230    let mut selected_files = Vec::new();
231    let mut current_size = 0;
232
233    for file in files {
234        let file_size: usize = file.lines.iter().map(|l| l.len() + 1).sum(); // +1 for newline
235
236        if current_size + file_size <= max_size {
237            current_size += file_size;
238            selected_files.push(file);
239        } else if current_size > 0 {
240            // We have at least one file and this one would exceed the limit
241            // Stop adding more files
242            break;
243        } else {
244            // Even the first (highest priority) file is too large
245            // Take at least the first part of it
246            let truncated_lines = truncate_lines_to_fit(&file.lines, max_size);
247            selected_files.push(DiffFile {
248                path: file.path,
249                priority: file.priority,
250                lines: truncated_lines,
251            });
252            break;
253        }
254    }
255
256    let selected_count = selected_files.len();
257    let omitted_count = total_files.saturating_sub(selected_count);
258
259    // Build the truncated diff
260    let mut result = String::new();
261
262    // Add summary header at the top
263    if omitted_count > 0 {
264        use std::fmt::Write;
265        let _ = write!(
266            result,
267            "[Diff truncated: Showing first {selected_count} of {total_files} files. {omitted_count} files omitted due to size constraints.]\n\n"
268        );
269    }
270
271    for file in selected_files {
272        for line in &file.lines {
273            result.push_str(line);
274            result.push('\n');
275        }
276    }
277
278    result
279}
280
281/// Represents a single file's diff chunk.
282#[derive(Debug, Default, Clone)]
283struct DiffFile {
284    /// File path (extracted from diff header)
285    path: String,
286    /// Priority for selection (higher = more important)
287    priority: i32,
288    /// Lines in this file's diff
289    lines: Vec<String>,
290}
291
292/// Assign a priority score to a file path for truncation selection.
293///
294/// Higher priority files are kept first when truncating:
295/// - src/*.rs: +100 (source code is most important)
296/// - src/*: +80 (other src files)
297/// - tests/*: +40 (tests are important but secondary)
298/// - Cargo.toml, package.json, etc.: +60 (config files)
299/// - docs/*, *.md: +20 (docs are least important)
300/// - Other: +50 (default)
301fn prioritize_file_path(path: &str) -> i32 {
302    use std::path::Path;
303    let path_lower = path.to_lowercase();
304
305    // Helper function for case-insensitive extension check
306    let has_ext = |ext: &str| -> bool {
307        Path::new(path)
308            .extension()
309            .and_then(std::ffi::OsStr::to_str)
310            .is_some_and(|e| e.eq_ignore_ascii_case(ext))
311    };
312
313    // Helper function for case-insensitive file extension check on path_lower
314    let has_ext_lower = |ext: &str| -> bool {
315        Path::new(&path_lower)
316            .extension()
317            .and_then(std::ffi::OsStr::to_str)
318            .is_some_and(|e| e.eq_ignore_ascii_case(ext))
319    };
320
321    // Source code files (highest priority)
322    if path_lower.contains("src/") && has_ext_lower("rs") {
323        100
324    } else if path_lower.contains("src/") {
325        80
326    }
327    // Test files
328    else if path_lower.contains("test") {
329        40
330    }
331    // Config files - use case-insensitive extension check
332    else if has_ext("toml")
333        || has_ext("json")
334        || path_lower.ends_with("cargo.toml")
335        || path_lower.ends_with("package.json")
336        || path_lower.ends_with("tsconfig.json")
337    {
338        60
339    }
340    // Documentation files (lowest priority)
341    else if path_lower.contains("doc") || has_ext("md") {
342        20
343    }
344    // Default priority
345    else {
346        50
347    }
348}
349
350/// Truncate a slice of lines to fit within a maximum size.
351///
352/// This is a fallback for when even a single file is too large.
353/// Returns as many complete lines as will fit.
354fn truncate_lines_to_fit(lines: &[String], max_size: usize) -> Vec<String> {
355    let mut result = Vec::new();
356    let mut current_size = 0;
357
358    for line in lines {
359        let line_size = line.len() + 1; // +1 for newline
360        if current_size + line_size <= max_size {
361            current_size += line_size;
362            result.push(line.clone());
363        } else {
364            break;
365        }
366    }
367
368    // Add truncation marker to the last line
369    if let Some(last) = result.last_mut() {
370        last.push_str(" [truncated...]");
371    }
372
373    result
374}
375
376/// Check and pre-truncate diff if it exceeds agent's token limits.
377///
378/// Returns the (possibly truncated) diff and whether truncation occurred.
379fn check_and_pre_truncate_diff(
380    diff: &str,
381    commit_agent: &str,
382    runtime: &PipelineRuntime,
383) -> (String, bool) {
384    let max_size = max_prompt_size_for_agent(commit_agent);
385    if diff.len() > max_size {
386        runtime.logger.warn(&format!(
387            "Diff size ({} KB) exceeds agent limit ({} KB). Pre-truncating to avoid token errors.",
388            diff.len() / 1024,
389            max_size / 1024
390        ));
391        (truncate_diff_if_large(diff, max_size), true)
392    } else {
393        runtime.logger.info(&format!(
394            "Diff size ({} KB) is within safe limit ({} KB).",
395            diff.len() / 1024,
396            max_size / 1024
397        ));
398        (diff.to_string(), false)
399    }
400}
401
402/// Generate the appropriate prompt for the current retry strategy.
403///
404/// For XSD retry, the xsd_error parameter is used to provide feedback to the agent.
405/// Note: XSD retry is handled internally within the session, not as a separate stage.
406///
407/// For hardened resume, this function uses stored prompts from checkpoint when available
408/// to ensure deterministic behavior on resume.
409fn generate_prompt_for_strategy(
410    strategy: CommitRetryStrategy,
411    working_diff: &str,
412    template_context: &crate::prompts::TemplateContext,
413    workspace: &dyn crate::workspace::Workspace,
414    xsd_error: Option<&str>,
415    prompt_history: &HashMap<String, String>,
416    prompt_key: &str,
417) -> (String, bool) {
418    // Use stored_or_generate pattern for hardened resume
419    // The key identifies which prompt variant this is (strategy + retry state)
420    let full_prompt_key = if xsd_error.is_some() {
421        format!("{}_xsd_retry", prompt_key)
422    } else {
423        prompt_key.to_string()
424    };
425
426    let (prompt, was_replayed) =
427        get_stored_or_generate_prompt(&full_prompt_key, prompt_history, || match strategy {
428            CommitRetryStrategy::Normal => {
429                if let Some(error_msg) = xsd_error {
430                    // In-session XSD retry with error feedback
431                    prompt_xsd_retry_with_context(
432                        template_context,
433                        working_diff,
434                        error_msg,
435                        workspace,
436                    )
437                } else {
438                    // First attempt with normal XML prompt
439                    prompt_generate_commit_message_with_diff_with_context(
440                        template_context,
441                        working_diff,
442                        workspace,
443                    )
444                }
445            }
446            CommitRetryStrategy::Simplified => {
447                if let Some(error_msg) = xsd_error {
448                    // In-session XSD retry with error feedback
449                    prompt_xsd_retry_with_context(
450                        template_context,
451                        working_diff,
452                        error_msg,
453                        workspace,
454                    )
455                } else {
456                    // Simplified XML prompt
457                    prompt_simplified_commit_with_context(template_context, working_diff)
458                }
459            }
460        });
461
462    (prompt, was_replayed)
463}
464
465/// Log the current attempt with prompt size information.
466fn log_commit_attempt(
467    strategy: CommitRetryStrategy,
468    prompt_size_kb: usize,
469    commit_agent: &str,
470    runtime: &PipelineRuntime,
471) {
472    if strategy == CommitRetryStrategy::Normal {
473        runtime.logger.info(&format!(
474            "Attempt 1/{}: Using {} (prompt size: {} KB, agent: {})",
475            CommitRetryStrategy::total_stages(),
476            strategy,
477            prompt_size_kb,
478            commit_agent
479        ));
480    } else {
481        runtime.logger.warn(&format!(
482            "Attempt {}/{}: Re-prompting with {} (prompt size: {} KB, agent: {})...",
483            strategy as usize + 1,
484            CommitRetryStrategy::total_stages(),
485            strategy,
486            prompt_size_kb,
487            commit_agent
488        ));
489    }
490}
491
492/// Handle the extraction result from a commit attempt.
493///
494/// Returns `Some(result)` if we should return early (success),
495/// or `None` if we should continue to the next strategy.
496///
497/// With XSD validation handling everything, we only check:
498/// - Success: Valid commit message extracted
499/// - Failure: No valid message (try next strategy)
500fn handle_commit_extraction_result(
501    extraction_result: anyhow::Result<Option<CommitExtractionResult>>,
502    strategy: CommitRetryStrategy,
503    log_dir: &str,
504    runtime: &PipelineRuntime,
505    last_extraction: &mut Option<CommitExtractionResult>,
506    attempt_log: &mut CommitAttemptLog,
507) -> Option<anyhow::Result<CommitMessageResult>> {
508    let log_file = format!("{log_dir}/final.log");
509
510    match extraction_result {
511        Ok(Some(extraction)) => {
512            // XSD validation already passed - we have a valid commit message
513            runtime.logger.info(&format!(
514                "Successfully extracted commit message with {strategy}"
515            ));
516            let message = extraction.clone().into_message();
517            attempt_log.set_outcome(AttemptOutcome::Success(message.clone()));
518            *last_extraction = Some(extraction);
519            // Note: generated_prompts is collected in the context and returned later
520            Some(Ok(CommitMessageResult {
521                message,
522                success: true,
523                _log_path: log_file,
524                generated_prompts: std::collections::HashMap::new(),
525            }))
526        }
527        Ok(None) => {
528            runtime.logger.warn(&format!(
529                "No valid commit message extracted with {strategy}, will try next strategy"
530            ));
531            attempt_log.set_outcome(AttemptOutcome::ExtractionFailed(
532                "No valid commit message extracted".to_string(),
533            ));
534            None // Continue to next strategy
535        }
536        Err(e) => {
537            runtime.logger.error(&format!(
538                "Failed to extract commit message with {strategy}: {e}"
539            ));
540            attempt_log.set_outcome(AttemptOutcome::ExtractionFailed(e.to_string()));
541            None // Continue to next strategy
542        }
543    }
544}
545
546/// Build the list of agents to try for commit generation.
547///
548/// This helper function constructs the ordered list of agents to try,
549/// starting with the primary agent and followed by configured fallbacks.
550fn build_agents_to_try<'a>(fallbacks: &'a [&'a str], primary_agent: &'a str) -> Vec<&'a str> {
551    let mut agents_to_try: Vec<&'a str> = vec![primary_agent];
552    for fb in fallbacks {
553        if *fb != primary_agent && !agents_to_try.contains(fb) {
554            agents_to_try.push(fb);
555        }
556    }
557    agents_to_try
558}
559
560/// Context for a commit attempt, bundling related state to avoid too many arguments.
561struct CommitAttemptContext<'a> {
562    /// The diff being processed
563    working_diff: &'a str,
564    /// Log directory path
565    log_dir: &'a str,
566    /// Whether the diff was pre-truncated
567    diff_was_truncated: bool,
568    /// Template context for user template overrides
569    template_context: &'a crate::prompts::TemplateContext,
570    /// Workspace for file operations (trait object for DI)
571    workspace: &'a dyn crate::workspace::Workspace,
572    /// Prompt history for checkpoint/resume determinism
573    prompt_history: &'a HashMap<String, String>,
574    /// Unique key for this commit generation attempt
575    prompt_key: String,
576    /// Output map to capture prompts that were newly generated (not replayed)
577    /// This is used for checkpoint/resume determinism
578    generated_prompts: &'a mut std::collections::HashMap<String, String>,
579}
580
581/// Run a single commit attempt with the given strategy and agent.
582///
583/// This function runs a single agent (not using fallback) to allow for
584/// per-agent prompt variant cycling with in-session XSD validation retry.
585/// Returns Some(result) if we should return early (success or hard error),
586/// or None if we should continue to the next strategy.
587fn run_commit_attempt_with_agent(
588    strategy: CommitRetryStrategy,
589    ctx: &mut CommitAttemptContext<'_>,
590    runtime: &mut PipelineRuntime,
591    registry: &AgentRegistry,
592    agent: &str,
593    last_extraction: &mut Option<CommitExtractionResult>,
594    session: &mut CommitLogSession,
595) -> Option<anyhow::Result<CommitMessageResult>> {
596    // Get the agent config
597    let Some(agent_config) = registry.resolve_config(agent) else {
598        runtime
599            .logger
600            .warn(&format!("Agent '{agent}' not found in registry, skipping"));
601        let mut attempt_log = session.new_attempt(agent, strategy.description());
602        attempt_log.set_outcome(AttemptOutcome::ExtractionFailed(format!(
603            "Agent '{agent}' not found in registry"
604        )));
605        let _ = attempt_log.write_to_workspace(session.run_dir(), ctx.workspace);
606        return None;
607    };
608
609    // Build the command for this agent
610    let cmd_str = agent_config.build_cmd(true, true, false);
611    let logfile = format!("{}/{}_latest.log", ctx.log_dir, agent.replace('/', "-"));
612
613    // In-session retry loop with XSD validation feedback
614    let max_retries = strategy.max_session_retries();
615    let mut xsd_error: Option<String> = None;
616
617    for retry_num in 0..max_retries {
618        // Before each retry, check if the XML file is writable and clean up if locked
619        if retry_num > 0 {
620            use crate::files::io::check_and_cleanup_xml_before_retry_with_workspace;
621            use std::path::Path;
622            let xml_path =
623                Path::new(crate::files::llm_output_extraction::xml_paths::COMMIT_MESSAGE_XML);
624            let _ = check_and_cleanup_xml_before_retry_with_workspace(
625                ctx.workspace,
626                xml_path,
627                runtime.logger,
628            );
629        }
630
631        // For initial attempt, xsd_error is None
632        // For retries, we use the XSD error to guide the agent
633        // Build prompt key for this attempt (strategy-specific)
634        let prompt_key = format!("{}_{}", ctx.prompt_key, strategy.stage_number());
635        let (prompt, was_replayed) = generate_prompt_for_strategy(
636            strategy,
637            ctx.working_diff,
638            ctx.template_context,
639            ctx.workspace,
640            xsd_error.as_deref(),
641            ctx.prompt_history,
642            &prompt_key,
643        );
644        let prompt_size_kb = prompt.len() / 1024;
645
646        // Log if using stored prompt for determinism
647        if was_replayed && retry_num == 0 {
648            runtime.logger.info(&format!(
649                "Using stored prompt from checkpoint for determinism: {}",
650                prompt_key
651            ));
652        } else if !was_replayed {
653            // Capture the newly generated prompt for checkpoint/resume
654            ctx.generated_prompts
655                .insert(prompt_key.clone(), prompt.clone());
656        }
657
658        // Create attempt log
659        let mut attempt_log = session.new_attempt(agent, strategy.description());
660        attempt_log.set_prompt_size(prompt.len());
661        attempt_log.set_diff_info(ctx.working_diff.len(), ctx.diff_was_truncated);
662
663        // Log retry attempt if not first attempt
664        if retry_num > 0 {
665            runtime.logger.info(&format!(
666                "  In-session retry {}/{} for XSD validation",
667                retry_num,
668                max_retries - 1
669            ));
670            if let Some(ref error) = xsd_error {
671                runtime.logger.info(&format!("  XSD error: {}", error));
672            }
673        } else {
674            log_commit_attempt(strategy, prompt_size_kb, agent, runtime);
675        }
676
677        // Run the agent directly (without fallback)
678        let exit_code = match crate::pipeline::run_with_prompt(
679            &crate::pipeline::PromptCommand {
680                label: &format!("generate commit message ({})", strategy.description()),
681                display_name: agent,
682                cmd_str: &cmd_str,
683                prompt: &prompt,
684                logfile: &logfile,
685                parser_type: agent_config.json_parser,
686                env_vars: &agent_config.env_vars,
687            },
688            runtime,
689        ) {
690            Ok(result) => result.exit_code,
691            Err(e) => {
692                runtime.logger.error(&format!("Failed to run agent: {e}"));
693                attempt_log.set_outcome(AttemptOutcome::ExtractionFailed(format!(
694                    "Agent execution failed: {e}"
695                )));
696                let _ = attempt_log.write_to_workspace(session.run_dir(), ctx.workspace);
697                return None;
698            }
699        };
700
701        if exit_code != 0 {
702            runtime
703                .logger
704                .warn("Commit agent failed, checking logs for partial output...");
705        }
706
707        let extraction_result = extract_commit_message_from_logs_with_trace(
708            ctx.log_dir,
709            ctx.working_diff,
710            agent,
711            runtime.logger,
712            &mut attempt_log,
713            ctx.workspace,
714        );
715
716        // Check if we got a valid commit message or need to retry for XSD errors
717        match &extraction_result {
718            Ok(Some(_)) => {
719                // XSD validation passed - we have a valid commit message
720                let result = handle_commit_extraction_result(
721                    extraction_result,
722                    strategy,
723                    ctx.log_dir,
724                    runtime,
725                    last_extraction,
726                    &mut attempt_log,
727                );
728
729                if let Err(e) = attempt_log.write_to_workspace(session.run_dir(), ctx.workspace) {
730                    runtime
731                        .logger
732                        .warn(&format!("Failed to write attempt log: {e}"));
733                }
734
735                return result;
736            }
737            _ => {
738                // Extraction failed - continue to check for XSD errors for retry
739            }
740        };
741
742        // Check extraction attempts for XSD validation errors
743        let xsd_error_msg = attempt_log
744            .extraction_attempts
745            .iter()
746            .find(|attempt| attempt.detail.contains("XSD validation failed"))
747            .map(|attempt| attempt.detail.clone());
748
749        if let Some(ref error_msg) = xsd_error_msg {
750            runtime
751                .logger
752                .warn(&format!("  XSD validation failed: {}", error_msg));
753
754            if retry_num < max_retries - 1 {
755                // Extract just the error message (after "XSD validation failed: ")
756                let error = error_msg
757                    .strip_prefix("XSD validation failed: ")
758                    .unwrap_or(error_msg);
759
760                // Store error for next retry attempt
761                xsd_error = Some(error.to_string());
762
763                // Write attempt log but don't return yet
764                attempt_log.set_outcome(AttemptOutcome::XsdValidationFailed(error.to_string()));
765                let _ = attempt_log.write_to_workspace(session.run_dir(), ctx.workspace);
766
767                // Continue to next retry iteration
768                continue;
769            } else {
770                // No more retries - fall through to handle as extraction failure
771                runtime
772                    .logger
773                    .warn("  No more in-session retries remaining");
774            }
775        }
776
777        // Handle extraction result (failure cases)
778        let result = handle_commit_extraction_result(
779            extraction_result,
780            strategy,
781            ctx.log_dir,
782            runtime,
783            last_extraction,
784            &mut attempt_log,
785        );
786
787        // Write the attempt log
788        if let Err(e) = attempt_log.write_to_workspace(session.run_dir(), ctx.workspace) {
789            runtime
790                .logger
791                .warn(&format!("Failed to write attempt log: {e}"));
792        }
793
794        // If we got a result (success or hard error), return it
795        if result.is_some() {
796            return result;
797        }
798
799        // Otherwise, if this was a retry and we exhausted retries, break out
800        if retry_num >= max_retries - 1 {
801            break;
802        }
803
804        // For non-XSD errors, we don't retry in-session - move to next strategy
805        break;
806    }
807
808    None
809}
810
811/// Return the hardcoded fallback commit message as last resort.
812fn return_hardcoded_fallback(
813    log_file: &str,
814    runtime: &PipelineRuntime,
815    generated_prompts: std::collections::HashMap<String, String>,
816) -> CommitMessageResult {
817    runtime.logger.warn("");
818    runtime.logger.warn("All recovery methods failed:");
819    runtime.logger.warn("  - All 9 prompt variants exhausted");
820    runtime
821        .logger
822        .warn("  - All agents in fallback chain exhausted");
823    runtime.logger.warn("  - All truncation stages failed");
824    runtime.logger.warn("  - Emergency prompts failed");
825    runtime.logger.warn("");
826    runtime
827        .logger
828        .warn("Using hardcoded fallback commit message as last resort.");
829    runtime.logger.warn(&format!(
830        "Fallback message: \"{HARDCODED_FALLBACK_COMMIT}\""
831    ));
832    runtime.logger.warn("");
833
834    CommitMessageResult {
835        message: HARDCODED_FALLBACK_COMMIT.to_string(),
836        success: true,
837        _log_path: log_file.to_string(),
838        generated_prompts,
839    }
840}
841
842/// Generate a commit message using the standard agent pipeline with fallback.
843///
844/// This function uses the standard agent pipeline with fallback, which provides:
845/// - Proper stdout/stderr logging
846/// - Configurable fallback chains
847/// - Retry logic with exponential backoff
848/// - Agent error classification
849///
850/// Multi-stage retry logic:
851/// 1. Try initial prompt
852/// 2. On fallback/empty result, try strict JSON prompt
853/// 3. On failure, try V2 strict prompt (with negative examples)
854/// 4. On failure, try ultra-minimal prompt
855/// 5. On failure, try emergency prompt
856/// 6. Only use hardcoded fallback after all prompt variants exhausted
857///
858/// # Agent Cycling Behavior
859///
860/// This function implements proper strategy-first cycling by trying each strategy
861/// with all agents before moving to the next strategy:
862/// - Strategy 1 (initial): Agent 1 → Agent 2 → Agent 3
863/// - Strategy 2 (strict JSON): Agent 1 → Agent 2 → Agent 3
864/// - Strategy 3 (strict JSON V2): Agent 1 → Agent 2 → Agent 3
865/// - etc.
866///
867/// This approach is more efficient because if a particular strategy works well
868/// with any agent, we succeed quickly rather than exhausting all strategies
869/// on the first agent before trying others.
870///
871/// # Arguments
872///
873/// * `diff` - The git diff to generate a commit message for
874/// * `registry` - The agent registry for resolving agents and fallbacks
875/// * `runtime` - The pipeline runtime for execution services
876/// * `commit_agent` - The primary agent to use for commit generation
877/// * `template_context` - Template context for user template overrides
878/// * `workspace` - Workspace filesystem for file operations
879/// * `prompt_history` - Prompt history for checkpoint/resume determinism
880///
881/// # Returns
882///
883/// Returns `Ok(CommitMessageResult)` with the generated message and metadata.
884pub fn generate_commit_message(
885    diff: &str,
886    registry: &AgentRegistry,
887    runtime: &mut PipelineRuntime,
888    commit_agent: &str,
889    template_context: &crate::prompts::TemplateContext,
890    workspace: &dyn crate::workspace::Workspace,
891    prompt_history: &HashMap<String, String>,
892) -> anyhow::Result<CommitMessageResult> {
893    let log_dir = ".agent/logs/commit_generation";
894    let log_file = format!("{log_dir}/final.log");
895
896    workspace.create_dir_all(Path::new(log_dir))?;
897
898    // Check if diff is empty before proceeding
899    if diff.trim().is_empty() {
900        runtime
901            .logger
902            .warn("Empty diff provided to generate_commit_message, using fallback");
903        return Ok(CommitMessageResult {
904            message: HARDCODED_FALLBACK_COMMIT.to_string(),
905            success: true,
906            _log_path: log_file,
907            generated_prompts: std::collections::HashMap::new(),
908        });
909    }
910
911    runtime.logger.info("Generating commit message...");
912
913    // Create a logging session for this commit generation run
914    let mut session = create_commit_log_session(log_dir, runtime, workspace);
915    let (working_diff, diff_was_pre_truncated) =
916        check_and_pre_truncate_diff(diff, commit_agent, runtime);
917
918    let fallbacks = registry.available_fallbacks(AgentRole::Commit);
919    let agents_to_try = build_agents_to_try(&fallbacks, commit_agent);
920
921    let mut last_extraction: Option<CommitExtractionResult> = None;
922    let mut total_attempts = 0;
923
924    // Generate a unique prompt key for this commit generation attempt
925    // Use timestamp-based key to ensure uniqueness across different commit generations
926    let prompt_key = format!(
927        "commit_{}",
928        std::time::SystemTime::now()
929            .duration_since(std::time::UNIX_EPOCH)
930            .unwrap_or_default()
931            .as_secs()
932    );
933
934    // Map to capture newly generated prompts for checkpoint/resume
935    let mut generated_prompts = std::collections::HashMap::new();
936
937    let mut attempt_ctx = CommitAttemptContext {
938        working_diff: &working_diff,
939        log_dir,
940        diff_was_truncated: diff_was_pre_truncated,
941        template_context,
942        workspace,
943        prompt_history,
944        prompt_key,
945        generated_prompts: &mut generated_prompts,
946    };
947
948    // Try each agent with all prompt variants
949    if let Some(result) = try_agents_with_strategies(
950        &agents_to_try,
951        &mut attempt_ctx,
952        runtime,
953        registry,
954        &mut last_extraction,
955        &mut session,
956        &mut total_attempts,
957    ) {
958        log_completion(runtime, &session, total_attempts, &result, workspace);
959        // Include generated prompts in the result
960        return result.map(|mut r| {
961            r.generated_prompts = generated_prompts;
962            r
963        });
964    }
965
966    // Handle fallback cases
967    let fallback_ctx = CommitFallbackContext {
968        log_file: &log_file,
969    };
970    handle_commit_fallbacks(
971        &fallback_ctx,
972        runtime,
973        &session,
974        total_attempts,
975        last_extraction.as_ref(),
976        generated_prompts,
977        workspace,
978    )
979}
980
981/// Create a commit log session, with fallback.
982fn create_commit_log_session(
983    log_dir: &str,
984    runtime: &mut PipelineRuntime,
985    workspace: &dyn crate::workspace::Workspace,
986) -> CommitLogSession {
987    match CommitLogSession::new(log_dir, workspace) {
988        Ok(s) => {
989            runtime.logger.info(&format!(
990                "Commit logs will be written to: {}",
991                s.run_dir().display()
992            ));
993            s
994        }
995        Err(e) => {
996            runtime
997                .logger
998                .warn(&format!("Failed to create log session: {e}"));
999            CommitLogSession::new(log_dir, workspace).unwrap_or_else(|_| {
1000                CommitLogSession::new("/tmp/ralph-commit-logs", workspace)
1001                    .expect("fallback session")
1002            })
1003        }
1004    }
1005}
1006
1007/// Try all agents with their strategy variants.
1008///
1009/// This function implements strategy-first cycling:
1010/// - Outer loop: Iterate through strategies
1011/// - Inner loop: Try all agents with the current strategy
1012/// - Only advance to next strategy if ALL agents failed with current strategy
1013///
1014/// This ensures each strategy gets the best chance to succeed with all
1015/// available agents before we try degraded fallback prompts.
1016fn try_agents_with_strategies(
1017    agents: &[&str],
1018    ctx: &mut CommitAttemptContext<'_>,
1019    runtime: &mut PipelineRuntime,
1020    registry: &AgentRegistry,
1021    last_extraction: &mut Option<CommitExtractionResult>,
1022    session: &mut CommitLogSession,
1023    total_attempts: &mut usize,
1024) -> Option<anyhow::Result<CommitMessageResult>> {
1025    let mut strategy = CommitRetryStrategy::Normal;
1026    loop {
1027        runtime.logger.info(&format!(
1028            "Trying strategy {}/{}: {}",
1029            strategy.stage_number(),
1030            CommitRetryStrategy::total_stages(),
1031            strategy.description()
1032        ));
1033
1034        for (agent_idx, agent) in agents.iter().enumerate() {
1035            runtime.logger.info(&format!(
1036                "  - Agent {}/{}: {agent}",
1037                agent_idx + 1,
1038                agents.len()
1039            ));
1040
1041            *total_attempts += 1;
1042            if let Some(result) = run_commit_attempt_with_agent(
1043                strategy,
1044                ctx,
1045                runtime,
1046                registry,
1047                agent,
1048                last_extraction,
1049                session,
1050            ) {
1051                return Some(result);
1052            }
1053        }
1054
1055        runtime.logger.warn(&format!(
1056            "All agents failed for strategy: {}",
1057            strategy.description()
1058        ));
1059
1060        match strategy.next() {
1061            Some(next) => strategy = next,
1062            None => break,
1063        }
1064    }
1065    None
1066}
1067
1068/// Log completion info and write session summary on success.
1069fn log_completion(
1070    runtime: &mut PipelineRuntime,
1071    session: &CommitLogSession,
1072    total_attempts: usize,
1073    result: &anyhow::Result<CommitMessageResult>,
1074    workspace: &dyn crate::workspace::Workspace,
1075) {
1076    if let Ok(ref commit_result) = result {
1077        let _ = session.write_summary(
1078            total_attempts,
1079            &format!(
1080                "SUCCESS: {}",
1081                preview_commit_message(&commit_result.message)
1082            ),
1083            workspace,
1084        );
1085    }
1086    runtime.logger.info(&format!(
1087        "Commit generation complete after {total_attempts} attempts. Logs: {}",
1088        session.run_dir().display()
1089    ));
1090}
1091
1092/// Context for commit fallback handling.
1093struct CommitFallbackContext<'a> {
1094    log_file: &'a str,
1095}
1096
1097/// Handle fallback cases after all agents exhausted.
1098///
1099/// With XSD validation handling everything, the fallback logic is simple:
1100/// - If we have a last extraction with a valid message, use it
1101/// - Otherwise, use the hardcoded fallback
1102fn handle_commit_fallbacks(
1103    ctx: &CommitFallbackContext<'_>,
1104    runtime: &mut PipelineRuntime,
1105    session: &CommitLogSession,
1106    total_attempts: usize,
1107    last_extraction: Option<&CommitExtractionResult>,
1108    generated_prompts: std::collections::HashMap<String, String>,
1109    workspace: &dyn crate::workspace::Workspace,
1110) -> anyhow::Result<CommitMessageResult> {
1111    // Use message from last extraction if available
1112    // (XSD validation already passed if we have an extraction)
1113    if let Some(extraction) = last_extraction {
1114        let message = extraction.clone().into_message();
1115        let _ = session.write_summary(
1116            total_attempts,
1117            &format!("LAST_EXTRACTION: {}", preview_commit_message(&message)),
1118            workspace,
1119        );
1120        runtime.logger.info(&format!(
1121            "Commit generation complete after {total_attempts} attempts. Logs: {}",
1122            session.run_dir().display()
1123        ));
1124        return Ok(CommitMessageResult {
1125            message,
1126            success: true,
1127            _log_path: ctx.log_file.to_string(),
1128            generated_prompts,
1129        });
1130    }
1131
1132    // Hardcoded fallback as last resort
1133    let _ = session.write_summary(
1134        total_attempts,
1135        &format!("HARDCODED_FALLBACK: {HARDCODED_FALLBACK_COMMIT}"),
1136        workspace,
1137    );
1138    runtime.logger.info(&format!(
1139        "Commit generation complete after {total_attempts} attempts (hardcoded fallback). Logs: {}",
1140        session.run_dir().display()
1141    ));
1142    Ok(return_hardcoded_fallback(
1143        ctx.log_file,
1144        runtime,
1145        generated_prompts,
1146    ))
1147}
1148
1149/// Create a commit with an automatically generated message using the standard pipeline.
1150///
1151/// This is a replacement for `commit_with_auto_message_fallback_result` in `git_helpers`
1152/// that uses the standard agent pipeline with proper logging and fallback support.
1153///
1154/// # Arguments
1155///
1156/// * `diff` - The git diff to generate a commit message from
1157/// * `commit_agent` - The primary agent to use for commit generation
1158/// * `git_user_name` - Optional git user name
1159/// * `git_user_email` - Optional git user email
1160/// * `ctx` - The phase context containing registry, logger, colors, config, and timer
1161///
1162/// # Returns
1163///
1164/// Returns `CommitResultFallback` indicating success, no changes, or failure.
1165pub fn commit_with_generated_message(
1166    diff: &str,
1167    commit_agent: &str,
1168    git_user_name: Option<&str>,
1169    git_user_email: Option<&str>,
1170    ctx: &mut PhaseContext<'_>,
1171) -> CommitResultFallback {
1172    // Stage all changes first
1173    let staged = match git_add_all() {
1174        Ok(s) => s,
1175        Err(e) => {
1176            return CommitResultFallback::Failed(format!("Failed to stage changes: {e}"));
1177        }
1178    };
1179
1180    if !staged {
1181        return CommitResultFallback::NoChanges;
1182    }
1183
1184    // Track execution start for commit generation
1185    let start_time = std::time::Instant::now();
1186
1187    // Set up the runtime
1188    let mut runtime = PipelineRuntime {
1189        timer: ctx.timer,
1190        logger: ctx.logger,
1191        colors: ctx.colors,
1192        config: ctx.config,
1193        executor: ctx.executor,
1194        executor_arc: std::sync::Arc::clone(&ctx.executor_arc),
1195        workspace: ctx.workspace,
1196    };
1197
1198    // Generate commit message using the standard pipeline
1199    let result = match generate_commit_message(
1200        diff,
1201        ctx.registry,
1202        &mut runtime,
1203        commit_agent,
1204        ctx.template_context,
1205        ctx.workspace,
1206        &ctx.prompt_history,
1207    ) {
1208        Ok(r) => r,
1209        Err(e) => {
1210            // Record failed generation in execution history
1211            ctx.execution_history.add_step(
1212                ExecutionStep::new(
1213                    "commit",
1214                    0,
1215                    "commit_generation",
1216                    StepOutcome::failure(format!("Failed to generate commit message: {e}"), false),
1217                )
1218                .with_agent(commit_agent)
1219                .with_duration(start_time.elapsed().as_secs()),
1220            );
1221            return CommitResultFallback::Failed(format!("Failed to generate commit message: {e}"));
1222        }
1223    };
1224
1225    // Capture generated prompts for checkpoint/resume
1226    for (key, prompt) in result.generated_prompts {
1227        ctx.capture_prompt(&key, &prompt);
1228    }
1229
1230    // Check if generation succeeded
1231    if !result.success || result.message.trim().is_empty() {
1232        // This should never happen after our fixes, but add defensive fallback
1233        ctx.logger
1234            .warn("Commit generation returned empty message, using hardcoded fallback...");
1235        let fallback_message = HARDCODED_FALLBACK_COMMIT.to_string();
1236        let commit_result = match git_commit(
1237            &fallback_message,
1238            git_user_name,
1239            git_user_email,
1240            Some(ctx.executor),
1241        ) {
1242            Ok(Some(oid)) => CommitResultFallback::Success(oid),
1243            Ok(None) => CommitResultFallback::NoChanges,
1244            Err(e) => CommitResultFallback::Failed(format!("Failed to create commit: {e}")),
1245        };
1246        // Record completion with fallback in execution history
1247        let outcome = match &commit_result {
1248            CommitResultFallback::Success(oid) => StepOutcome::success(
1249                Some(format!("Commit created: {oid}")),
1250                vec![".".to_string()],
1251            ),
1252            CommitResultFallback::NoChanges => {
1253                StepOutcome::skipped("No changes to commit".to_string())
1254            }
1255            CommitResultFallback::Failed(e) => StepOutcome::failure(e.clone(), false),
1256        };
1257        ctx.execution_history.add_step(
1258            ExecutionStep::new("commit", 0, "commit_generation", outcome)
1259                .with_agent(commit_agent)
1260                .with_duration(start_time.elapsed().as_secs()),
1261        );
1262        commit_result
1263    } else {
1264        // Create the commit with the generated message
1265        let commit_result = match git_commit(
1266            &result.message,
1267            git_user_name,
1268            git_user_email,
1269            Some(ctx.executor),
1270        ) {
1271            Ok(Some(oid)) => CommitResultFallback::Success(oid),
1272            Ok(None) => CommitResultFallback::NoChanges,
1273            Err(e) => CommitResultFallback::Failed(format!("Failed to create commit: {e}")),
1274        };
1275        // Record completion in execution history
1276        let outcome = match &commit_result {
1277            CommitResultFallback::Success(oid) => StepOutcome::success(
1278                Some(format!("Commit created: {oid}")),
1279                vec![".".to_string()],
1280            ),
1281            CommitResultFallback::NoChanges => {
1282                StepOutcome::skipped("No changes to commit".to_string())
1283            }
1284            CommitResultFallback::Failed(e) => StepOutcome::failure(e.clone(), false),
1285        };
1286        let oid_for_history = match &commit_result {
1287            CommitResultFallback::Success(oid) => Some(oid.to_string()),
1288            _ => None,
1289        };
1290        let mut step = ExecutionStep::new("commit", 0, "commit_generation", outcome)
1291            .with_agent(commit_agent)
1292            .with_duration(start_time.elapsed().as_secs());
1293        if let Some(oid) = &oid_for_history {
1294            step = step.with_git_commit_oid(oid);
1295        }
1296        ctx.execution_history.add_step(step);
1297        commit_result
1298    }
1299}
1300
1301/// Import types needed for parsing trace helpers.
1302use crate::phases::commit_logging::{ParsingTraceLog, ParsingTraceStep};
1303
1304/// Write parsing trace log to file with error handling.
1305fn write_parsing_trace_with_logging(
1306    parsing_trace: &ParsingTraceLog,
1307    log_dir: &str,
1308    logger: &Logger,
1309    workspace: &dyn crate::workspace::Workspace,
1310) {
1311    if let Err(e) = parsing_trace.write_to_workspace(Path::new(log_dir), workspace) {
1312        logger.warn(&format!("Failed to write parsing trace log: {e}"));
1313    }
1314}
1315
1316/// Try XML extraction and record in parsing trace.
1317/// Returns `Some(result)` if extraction succeeded (XSD validation passed), `None` otherwise.
1318fn try_xml_extraction_traced(
1319    content: &str,
1320    step_number: &mut usize,
1321    parsing_trace: &mut ParsingTraceLog,
1322    logger: &Logger,
1323    attempt_log: &mut CommitAttemptLog,
1324    log_dir: &str,
1325    workspace: &dyn crate::workspace::Workspace,
1326) -> Option<CommitExtractionResult> {
1327    // Try file-based extraction first - allows agents to write XML to .agent/tmp/commit_message.xml
1328    let xml_file_path = Path::new(xml_paths::COMMIT_MESSAGE_XML);
1329    let (xml_result, xml_detail) =
1330        if let Some(file_xml) = try_extract_from_file_with_workspace(workspace, xml_file_path) {
1331            // Found XML in file - validate it
1332            let (validated, detail) = try_extract_xml_commit_with_trace(&file_xml);
1333            let detail = format!("file-based: {}", detail);
1334            (validated, detail)
1335        } else {
1336            // Fall back to log content extraction
1337            try_extract_xml_commit_with_trace(content)
1338        };
1339    logger.info(&format!("  ✓ XML extraction: {xml_detail}"));
1340
1341    parsing_trace.add_step(
1342        ParsingTraceStep::new(*step_number, "XML Extraction")
1343            .with_input(&content[..content.len().min(1000)])
1344            .with_result(xml_result.as_deref().unwrap_or("[No XML found]"))
1345            .with_success(xml_result.is_some())
1346            .with_details(&xml_detail),
1347    );
1348    *step_number += 1;
1349
1350    if let Some(message) = xml_result {
1351        // XSD validation already passed inside try_extract_xml_commit_with_trace
1352        // Archive the XML file now that it's been successfully processed
1353        archive_xml_file_with_workspace(workspace, xml_file_path);
1354
1355        attempt_log.add_extraction_attempt(ExtractionAttempt::success("XML", xml_detail));
1356        parsing_trace.set_final_message(&message);
1357        write_parsing_trace_with_logging(parsing_trace, log_dir, logger, workspace);
1358        return Some(CommitExtractionResult::new(message));
1359    }
1360
1361    // XML extraction or XSD validation failed - file stays in place for agent to edit
1362    attempt_log.add_extraction_attempt(ExtractionAttempt::failure("XML", xml_detail));
1363    logger.info("  ✗ XML extraction failed");
1364    None
1365}
1366
1367/// Extract a commit message from agent logs with full tracing for diagnostics.
1368///
1369/// Records all extraction attempts in the provided `CommitAttemptLog` for debugging.
1370///
1371/// This function also creates and writes a `ParsingTraceLog` that captures
1372/// detailed information about each extraction step, including the exact
1373/// content being processed and XSD validation results.
1374fn extract_commit_message_from_logs_with_trace(
1375    log_dir: &str,
1376    _diff: &str,
1377    _agent_cmd: &str,
1378    logger: &Logger,
1379    attempt_log: &mut CommitAttemptLog,
1380    workspace: &dyn crate::workspace::Workspace,
1381) -> anyhow::Result<Option<CommitExtractionResult>> {
1382    // Create parsing trace log
1383    let mut parsing_trace = ParsingTraceLog::new(
1384        attempt_log.attempt_number,
1385        &attempt_log.agent,
1386        &attempt_log.strategy,
1387    );
1388
1389    // Read and preprocess log content using workspace abstraction
1390    let Some(content) = read_log_content_with_trace(log_dir, logger, attempt_log, workspace)?
1391    else {
1392        return Ok(None);
1393    };
1394
1395    // Set raw output in parsing trace
1396    parsing_trace.set_raw_output(&content);
1397
1398    let mut step_number = 1;
1399
1400    // XML-only extraction with XSD validation
1401    // The XML extraction includes flexible parsing with 4 strategies and XSD validation
1402    // If XSD validation fails, the error is returned for in-session retry
1403    if let Some(result) = try_xml_extraction_traced(
1404        &content,
1405        &mut step_number,
1406        &mut parsing_trace,
1407        logger,
1408        attempt_log,
1409        log_dir,
1410        workspace,
1411    ) {
1412        return Ok(Some(result));
1413    }
1414
1415    // XML extraction failed - add final failure step to parsing trace
1416    parsing_trace.add_step(
1417        ParsingTraceStep::new(step_number, "XML Extraction Failed")
1418            .with_input(&content[..content.len().min(1000)])
1419            .with_success(false)
1420            .with_details("No valid XML found or XSD validation failed"),
1421    );
1422
1423    write_parsing_trace_with_logging(&parsing_trace, log_dir, logger, workspace);
1424
1425    // Return None to trigger next strategy/agent fallback
1426    // The in-session retry loop will have already attempted XSD validation retries
1427    // if the error was an XSD validation failure (detected in attempt_log)
1428    Ok(None)
1429}
1430
1431/// Read and preprocess log content for extraction.
1432fn read_log_content_with_trace(
1433    log_dir: &str,
1434    logger: &Logger,
1435    attempt_log: &mut CommitAttemptLog,
1436    workspace: &dyn crate::workspace::Workspace,
1437) -> anyhow::Result<Option<String>> {
1438    let log_dir_path = Path::new(log_dir);
1439    let log_path = find_most_recent_log_in_workspace(log_dir_path, workspace)?;
1440    let Some(log_file) = log_path else {
1441        logger.warn("No log files found in commit generation directory");
1442        attempt_log.add_extraction_attempt(ExtractionAttempt::failure(
1443            "File",
1444            "No log files found".to_string(),
1445        ));
1446        return Ok(None);
1447    };
1448
1449    logger.info(&format!(
1450        "Reading commit message from log: {}",
1451        log_file.display()
1452    ));
1453
1454    let content = workspace.read(&log_file)?;
1455    attempt_log.set_raw_output(&content);
1456
1457    if content.trim().is_empty() {
1458        logger.warn("Log file is empty");
1459        attempt_log.add_extraction_attempt(ExtractionAttempt::failure(
1460            "File",
1461            "Log file is empty".to_string(),
1462        ));
1463        return Ok(None);
1464    }
1465
1466    // Apply preprocessing
1467    Ok(Some(preprocess_raw_content(&content)))
1468}
1469
1470/// Find the most recent log file in a directory using workspace abstraction.
1471///
1472/// This function uses the workspace trait for filesystem operations, making it
1473/// testable with `MemoryWorkspace` and ensuring proper architecture conformance.
1474///
1475/// # Arguments
1476///
1477/// * `log_dir` - The directory to search for log files
1478/// * `workspace` - The workspace to use for filesystem operations
1479///
1480/// # Returns
1481///
1482/// * `Ok(Some(path))` - Path to the most recent log file
1483/// * `Ok(None)` - No log files found
1484/// * `Err(e)` - Error reading directory
1485fn find_most_recent_log_in_workspace(
1486    log_dir: &Path,
1487    workspace: &dyn crate::workspace::Workspace,
1488) -> anyhow::Result<Option<std::path::PathBuf>> {
1489    // Use the pipeline::logfile module's workspace-based implementation
1490    // That module already has the proper workspace-based find_most_recent_logfile function
1491    // We need to adapt from directory search to prefix-based search
1492
1493    // If directory doesn't exist in workspace, return None
1494    if !workspace.exists(log_dir) {
1495        return Ok(None);
1496    }
1497
1498    let entries = match workspace.read_dir(log_dir) {
1499        Ok(e) => e,
1500        Err(_) => return Ok(None),
1501    };
1502
1503    let mut most_recent: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
1504
1505    for entry in entries {
1506        if !entry.is_file() {
1507            continue;
1508        }
1509
1510        // Only look at .log files
1511        if let Some(filename) = entry.file_name().and_then(|s| s.to_str()) {
1512            if !filename.ends_with(".log") {
1513                continue;
1514            }
1515        } else {
1516            continue;
1517        }
1518
1519        if let Some(modified) = entry.modified() {
1520            match &most_recent {
1521                None => {
1522                    most_recent = Some((entry.path().to_path_buf(), modified));
1523                }
1524                Some((_, prev_modified)) if modified > *prev_modified => {
1525                    most_recent = Some((entry.path().to_path_buf(), modified));
1526                }
1527                _ => {}
1528            }
1529        } else {
1530            // No modification time available, use this if we have no best yet
1531            if most_recent.is_none() {
1532                most_recent = Some((
1533                    entry.path().to_path_buf(),
1534                    std::time::SystemTime::UNIX_EPOCH,
1535                ));
1536            }
1537        }
1538    }
1539
1540    Ok(most_recent.map(|(path, _)| path))
1541}
1542
1543#[cfg(test)]
1544mod tests {
1545    use super::*;
1546    use crate::workspace::MemoryWorkspace;
1547    use std::path::Path;
1548    use std::time::{Duration, SystemTime};
1549
1550    /// Read log content using workspace abstraction (test helper).
1551    ///
1552    /// This function finds the most recent log file in the given directory and reads
1553    /// its content using the workspace trait, making it testable with `MemoryWorkspace`.
1554    fn read_log_content_with_workspace(
1555        log_dir: &Path,
1556        workspace: &dyn crate::workspace::Workspace,
1557    ) -> anyhow::Result<Option<String>> {
1558        let log_path = find_most_recent_log_in_workspace(log_dir, workspace)?;
1559        let Some(log_file) = log_path else {
1560            return Ok(None);
1561        };
1562
1563        let content = workspace.read(&log_file)?;
1564
1565        if content.trim().is_empty() {
1566            return Ok(None);
1567        }
1568
1569        Ok(Some(content))
1570    }
1571
1572    #[test]
1573    fn test_find_most_recent_log_uses_workspace() {
1574        // Create workspace with log files at different modification times
1575        let time1 = SystemTime::UNIX_EPOCH + Duration::from_secs(1000);
1576        let time2 = SystemTime::UNIX_EPOCH + Duration::from_secs(2000);
1577
1578        let workspace = MemoryWorkspace::new_test()
1579            .with_file_at_time(
1580                ".agent/logs/commit_generation/old_agent_0.log",
1581                "old content",
1582                time1,
1583            )
1584            .with_file_at_time(
1585                ".agent/logs/commit_generation/new_agent_0.log",
1586                "new content with XML",
1587                time2,
1588            );
1589
1590        // Test that find_most_recent_log works with workspace
1591        let log_dir = Path::new(".agent/logs/commit_generation");
1592        let result = find_most_recent_log_in_workspace(log_dir, &workspace);
1593        assert!(result.is_ok());
1594        let log_path = result.unwrap();
1595        assert!(log_path.is_some());
1596        // Should find the most recent log file
1597        let path = log_path.unwrap();
1598        assert!(path.to_string_lossy().contains("new_agent"));
1599    }
1600
1601    #[test]
1602    fn test_find_most_recent_log_empty_workspace() {
1603        let workspace = MemoryWorkspace::new_test();
1604
1605        let log_dir = Path::new(".agent/logs/commit_generation");
1606        let result = find_most_recent_log_in_workspace(log_dir, &workspace);
1607        assert!(result.is_ok());
1608        assert!(result.unwrap().is_none());
1609    }
1610
1611    #[test]
1612    fn test_read_log_content_uses_workspace() {
1613        // Create workspace with a log file
1614        let workspace = MemoryWorkspace::new_test().with_file(
1615            ".agent/logs/commit_generation/test_agent_0.log",
1616            "test log content for extraction",
1617        );
1618
1619        let log_dir = Path::new(".agent/logs/commit_generation");
1620
1621        // read_log_content_with_workspace should read from workspace, not std::fs
1622        let result = read_log_content_with_workspace(log_dir, &workspace);
1623        assert!(result.is_ok());
1624        let content = result.unwrap();
1625        assert!(content.is_some());
1626        assert_eq!(content.unwrap(), "test log content for extraction");
1627    }
1628
1629    #[test]
1630    fn test_read_log_content_empty_log() {
1631        let workspace = MemoryWorkspace::new_test()
1632            .with_file(".agent/logs/commit_generation/empty_agent_0.log", "");
1633
1634        let log_dir = Path::new(".agent/logs/commit_generation");
1635        let result = read_log_content_with_workspace(log_dir, &workspace);
1636        assert!(result.is_ok());
1637        // Empty log should return None
1638        assert!(result.unwrap().is_none());
1639    }
1640
1641    #[test]
1642    fn test_truncate_diff_if_large() {
1643        let large_diff = "a".repeat(100_000);
1644        let truncated = truncate_diff_if_large(&large_diff, 10_000);
1645
1646        // Should be truncated
1647        assert!(truncated.len() < large_diff.len());
1648    }
1649
1650    #[test]
1651    fn test_truncate_preserves_small_diffs() {
1652        let small_diff = "a".repeat(100);
1653        let truncated = truncate_diff_if_large(&small_diff, 10_000);
1654
1655        // Should not be modified
1656        assert_eq!(truncated, small_diff);
1657    }
1658
1659    #[test]
1660    fn test_truncate_exactly_at_limit() {
1661        let diff = "a".repeat(10_000);
1662        let truncated = truncate_diff_if_large(&diff, 10_000);
1663
1664        // Should not be modified when at exact limit
1665        assert_eq!(truncated, diff);
1666    }
1667
1668    #[test]
1669    fn test_truncate_preserves_file_boundaries() {
1670        let diff = "diff --git a/file1.rs b/file1.rs\n\
1671            +line1\n\
1672            +line2\n\
1673            diff --git a/file2.rs b/file2.rs\n\
1674            +line3\n\
1675            +line4\n";
1676        let large_diff = format!("{}{}", diff, "x".repeat(100_000));
1677        let truncated = truncate_diff_if_large(&large_diff, 50);
1678
1679        // Should preserve complete file blocks
1680        assert!(truncated.contains("diff --git"));
1681        // Should contain truncation summary
1682        assert!(truncated.contains("Diff truncated"));
1683    }
1684
1685    #[test]
1686    fn test_prioritize_file_path() {
1687        // Source files get highest priority
1688        assert!(prioritize_file_path("src/main.rs") > prioritize_file_path("tests/test.rs"));
1689        assert!(prioritize_file_path("src/lib.rs") > prioritize_file_path("README.md"));
1690
1691        // Tests get lower priority than src
1692        assert!(prioritize_file_path("src/main.rs") > prioritize_file_path("test/test.rs"));
1693
1694        // Config files get medium priority
1695        assert!(prioritize_file_path("Cargo.toml") > prioritize_file_path("docs/guide.md"));
1696
1697        // Docs get lowest priority
1698        assert!(prioritize_file_path("README.md") < prioritize_file_path("src/main.rs"));
1699    }
1700
1701    #[test]
1702    fn test_truncate_keeps_high_priority_files() {
1703        let diff = "diff --git a/README.md b/README.md\n\
1704            +doc change\n\
1705            diff --git a/src/main.rs b/src/main.rs\n\
1706            +important change\n\
1707            diff --git a/tests/test.rs b/tests/test.rs\n\
1708            +test change\n";
1709
1710        // With a very small limit, should keep src/main.rs first
1711        let truncated = truncate_diff_if_large(diff, 80);
1712
1713        // Should include the high priority src file
1714        assert!(truncated.contains("src/main.rs"));
1715    }
1716
1717    #[test]
1718    fn test_truncate_lines_to_fit() {
1719        let lines = vec![
1720            "line1".to_string(),
1721            "line2".to_string(),
1722            "line3".to_string(),
1723            "line4".to_string(),
1724        ];
1725
1726        // Only fit first 3 lines (each 5 chars + newline = 6)
1727        let truncated = truncate_lines_to_fit(&lines, 18);
1728
1729        assert_eq!(truncated.len(), 3);
1730        // Last line should have truncation marker
1731        assert!(truncated[2].ends_with("[truncated...]"));
1732    }
1733
1734    #[test]
1735    fn test_hardcoded_fallback_commit() {
1736        // The hardcoded fallback should always be a valid conventional commit
1737        use crate::files::llm_output_extraction::is_conventional_commit_subject;
1738        assert!(
1739            is_conventional_commit_subject(HARDCODED_FALLBACK_COMMIT),
1740            "Hardcoded fallback must be a valid conventional commit"
1741        );
1742        assert!(!HARDCODED_FALLBACK_COMMIT.is_empty());
1743        assert!(HARDCODED_FALLBACK_COMMIT.len() >= 5);
1744    }
1745}
ralph_workflow/phases/commit.rs

ralph_workflow/phases/
commit.rs