Skip to main content

bock_build/
repair.rs

1//! Codegen feedback loop (§17.7).
2//!
3//! The pipeline:
4//!
5//! 1. Compile already-generated target code via [`ToolchainRegistry`].
6//! 2. On failure — if a provider is configured and strictness allows —
7//!    call [`AiProvider::repair`] with the compiler error + AIR node.
8//! 3. Gate the repair on confidence (pinned cache replay bypasses, as
9//!    in D.5). Accepted repairs are retried against the compiler; if
10//!    they succeed, emit a build-scope [`Decision`].
11//! 4. Extract any [`CandidateRule`] on the repair response into the
12//!    local [`RuleCache`]. A second build can then hit the rule first
13//!    and skip the AI round-trip entirely.
14//! 5. Cap retries ([`RepairConfig::max_attempts`]; default 2). After
15//!    the cap the last compiler error is returned.
16//!
17//! This module does **not** drive initial generation — D.5's
18//! `AiSynthesisDriver` already does that. It owns the *post-generation*
19//! feedback loop: take already-produced code + the AIR node it came
20//! from, and reconcile it with the target compiler.
21
22use std::path::{Path, PathBuf};
23use std::sync::{Arc, Mutex};
24
25use bock_ai::{
26    compute_key, node_kind_name, AiProvider, CandidateRule, Decision, DecisionType,
27    ManifestError, ManifestWriter, RepairRequest, Rule, RuleCache, TargetProfile,
28};
29use bock_air::AIRNode;
30use bock_types::Strictness;
31use chrono::Utc;
32
33use crate::toolchain::{CompilationResult, ToolchainError, ToolchainRegistry};
34
35// ─── Configuration ───────────────────────────────────────────────────────────
36
37/// Runtime knobs for a single module's repair pass.
38#[derive(Debug, Clone)]
39pub struct RepairConfig {
40    /// Maximum repair retries per node before giving up (default 2).
41    ///
42    /// Caps infinite loops where repair keeps producing non-compiling
43    /// code; the final compiler error is returned when exceeded.
44    pub max_attempts: usize,
45    /// Minimum confidence to accept a repair (§17.4 / §17.8).
46    pub confidence_threshold: f64,
47    /// Strictness for the current compilation. Gates rule auto-apply
48    /// vs. pinned-only per §17.7.
49    pub strictness: Strictness,
50    /// Module path written into decision records.
51    pub module_path: PathBuf,
52}
53
54impl Default for RepairConfig {
55    fn default() -> Self {
56        Self {
57            max_attempts: 2,
58            confidence_threshold: 0.75,
59            strictness: Strictness::Development,
60            module_path: PathBuf::new(),
61        }
62    }
63}
64
65// ─── Outcome ─────────────────────────────────────────────────────────────────
66
67/// Result of a repair pass over a single generated snippet.
68#[derive(Debug, Clone, PartialEq)]
69pub enum RepairOutcome {
70    /// Compiler accepted the original code on the first try — no repair
71    /// was attempted.
72    FirstTrySuccess {
73        /// The code that compiled as-is.
74        code: String,
75    },
76    /// Repair produced working code after one or more iterations.
77    Repaired {
78        /// The fixed code the compiler accepted.
79        code: String,
80        /// Number of repair attempts used (>= 1).
81        attempts: usize,
82        /// `true` if the repair response carried a candidate rule that
83        /// was persisted to the [`RuleCache`].
84        rule_added: bool,
85    },
86    /// The last repair attempt was rejected by the confidence gate and
87    /// no further attempts were allowed.
88    RejectedLowConfidence {
89        /// Confidence reported by the provider.
90        confidence: f64,
91        /// The last compiler error we couldn't repair past.
92        compiler_error: String,
93    },
94    /// Every attempt was made and the compiler still rejected the code.
95    Exhausted {
96        /// Number of repair attempts made.
97        attempts: usize,
98        /// The last compiler error encountered.
99        compiler_error: String,
100    },
101    /// No provider was configured, so repair was skipped.
102    NoProvider {
103        /// Original compiler error.
104        compiler_error: String,
105    },
106    /// Production strictness forbade calling the provider.
107    ProductionBlocked {
108        /// Original compiler error.
109        compiler_error: String,
110    },
111    /// The provider returned an error.
112    ProviderError {
113        /// Human-readable provider error message.
114        message: String,
115    },
116}
117
118impl RepairOutcome {
119    /// Returns the working code, if the outcome has one.
120    #[must_use]
121    pub fn accepted_code(&self) -> Option<&str> {
122        match self {
123            Self::FirstTrySuccess { code } | Self::Repaired { code, .. } => Some(code),
124            _ => None,
125        }
126    }
127
128    /// `true` if the repair loop produced working code (first-try or
129    /// after at least one repair).
130    #[must_use]
131    pub fn is_success(&self) -> bool {
132        matches!(self, Self::FirstTrySuccess { .. } | Self::Repaired { .. })
133    }
134}
135
136// ─── Errors ──────────────────────────────────────────────────────────────────
137
138/// Fatal errors that prevent the repair pipeline from making a decision.
139///
140/// These are distinct from [`RepairOutcome`] variants: an outcome like
141/// `Exhausted` means the pipeline ran normally but compilation kept
142/// failing, whereas these errors mean the pipeline itself couldn't run
143/// (e.g., the manifest couldn't be flushed).
144#[derive(Debug, thiserror::Error)]
145pub enum RepairError {
146    /// Manifest write failed.
147    #[error("manifest error: {0}")]
148    Manifest(#[from] ManifestError),
149    /// Rule cache write failed.
150    #[error("rule cache error: {0}")]
151    Rules(#[from] bock_ai::RuleCacheError),
152    /// I/O error writing or reading the candidate code for toolchain
153    /// invocation.
154    #[error("I/O error during repair: {0}")]
155    Io(#[from] std::io::Error),
156    /// An unexpected toolchain error that isn't a compilation failure —
157    /// e.g., a missing toolchain binary.
158    #[error("toolchain error: {0}")]
159    Toolchain(#[from] ToolchainError),
160}
161
162// ─── Pipeline ────────────────────────────────────────────────────────────────
163
164/// Wires together the toolchain, AI provider, rule cache, and manifest
165/// writer into a single repair pass per AIR node.
166///
167/// The pipeline is cheap to construct and safe to share across threads;
168/// the provider and manifest writer are behind `Arc`.
169pub struct RepairPipeline {
170    provider: Option<Arc<dyn AiProvider>>,
171    rules: Option<RuleCache>,
172    manifest: Option<Arc<Mutex<ManifestWriter>>>,
173    toolchain: Arc<ToolchainRegistry>,
174    config: RepairConfig,
175}
176
177impl RepairPipeline {
178    /// Constructs a pipeline with no AI provider. Every failing compile
179    /// ends as [`RepairOutcome::NoProvider`]; the toolchain still runs.
180    #[must_use]
181    pub fn without_provider(toolchain: Arc<ToolchainRegistry>, config: RepairConfig) -> Self {
182        Self {
183            provider: None,
184            rules: None,
185            manifest: None,
186            toolchain,
187            config,
188        }
189    }
190
191    /// Constructs a fully wired pipeline.
192    #[must_use]
193    pub fn new(
194        provider: Arc<dyn AiProvider>,
195        rules: Option<RuleCache>,
196        manifest: Option<Arc<Mutex<ManifestWriter>>>,
197        toolchain: Arc<ToolchainRegistry>,
198        config: RepairConfig,
199    ) -> Self {
200        Self {
201            provider: Some(provider),
202            rules,
203            manifest,
204            toolchain,
205            config,
206        }
207    }
208
209    /// Borrow the active config.
210    #[must_use]
211    pub fn config(&self) -> &RepairConfig {
212        &self.config
213    }
214
215    /// Run the repair loop for one generated snippet.
216    ///
217    /// Writes `code` (and each repaired candidate) to `source_path` so
218    /// the target toolchain can read it. Returns the outcome; the
219    /// caller decides whether to fail the build or keep going.
220    ///
221    /// # Errors
222    /// Returns [`RepairError`] for filesystem failures writing the
223    /// candidate source, missing toolchains (anything other than
224    /// `InvocationFailed`), manifest write failures, and rule cache
225    /// write failures.
226    pub async fn run(
227        &self,
228        target: &TargetProfile,
229        node: &AIRNode,
230        initial_code: String,
231        source_path: &Path,
232    ) -> Result<RepairOutcome, RepairError> {
233        let target_id = target.id.clone();
234
235        // First compile attempt against the generator's code.
236        write_candidate(source_path, &initial_code)?;
237        match self.toolchain.invoke(&target_id, source_path, false) {
238            Ok(_) => return Ok(RepairOutcome::FirstTrySuccess { code: initial_code }),
239            Err(ToolchainError::InvocationFailed { .. }) => { /* fall through to repair */ }
240            Err(other) => return Err(other.into()),
241        }
242
243        // Read first error.
244        let mut compiler_error =
245            invocation_error(&self.toolchain.invoke(&target_id, source_path, false));
246
247        // No provider → no repair path.
248        let Some(provider) = self.provider.clone() else {
249            return Ok(RepairOutcome::NoProvider { compiler_error });
250        };
251
252        // Production strictness forbids unpinned AI repair calls. The
253        // guard mirrors D.5's `ProductionUnpinned` logic: at production
254        // the only acceptable path is a pre-pinned rule, which the
255        // caller was expected to try first via `RuleCache::lookup`.
256        if matches!(self.config.strictness, Strictness::Production) {
257            return Ok(RepairOutcome::ProductionBlocked { compiler_error });
258        }
259
260        let mut current_code = initial_code;
261        let mut attempts: usize = 0;
262        let mut rule_added = false;
263
264        while attempts < self.config.max_attempts {
265            attempts += 1;
266
267            let request = RepairRequest {
268                original_code: current_code.clone(),
269                compiler_error: compiler_error.clone(),
270                node: node.clone(),
271                target: target.clone(),
272            };
273            let response = match provider.repair(&request).await {
274                Ok(r) => r,
275                Err(e) => {
276                    return Ok(RepairOutcome::ProviderError {
277                        message: format!("{e}"),
278                    });
279                }
280            };
281
282            if response.confidence < self.config.confidence_threshold {
283                // Persist the compiler error we stopped on. Don't retry
284                // — the model has told us it isn't confident.
285                return Ok(RepairOutcome::RejectedLowConfidence {
286                    confidence: response.confidence,
287                    compiler_error,
288                });
289            }
290
291            // Try the repaired code.
292            write_candidate(source_path, &response.fixed_code)?;
293            match self.toolchain.invoke(&target_id, source_path, false) {
294                Ok(_) => {
295                    // Compile succeeded. Record the repair decision,
296                    // maybe persist the candidate rule, and return.
297                    self.record_repair(node, target, &response, &compiler_error)?;
298                    if let Some(candidate) = response.candidate_rule.as_ref() {
299                        if let Some(rules) = &self.rules {
300                            let rule = persist_rule(
301                                rules,
302                                candidate,
303                                node,
304                                response.confidence,
305                            )?;
306                            rule_added = true;
307                            self.record_rule_applied(node, target, &rule)?;
308                        }
309                    }
310                    return Ok(RepairOutcome::Repaired {
311                        code: response.fixed_code,
312                        attempts,
313                        rule_added,
314                    });
315                }
316                Err(ToolchainError::InvocationFailed { .. }) => {
317                    // Roll forward: keep the attempted code and the new
318                    // error for the next iteration.
319                    current_code = response.fixed_code;
320                    compiler_error = invocation_error(
321                        &self.toolchain.invoke(&target_id, source_path, false),
322                    );
323                }
324                Err(other) => return Err(other.into()),
325            }
326        }
327
328        Ok(RepairOutcome::Exhausted {
329            attempts,
330            compiler_error,
331        })
332    }
333
334    fn record_repair(
335        &self,
336        node: &AIRNode,
337        target: &TargetProfile,
338        response: &bock_ai::RepairResponse,
339        original_error: &str,
340    ) -> Result<(), ManifestError> {
341        let Some(manifest) = &self.manifest else {
342            return Ok(());
343        };
344        let mut mw = manifest.lock().expect("manifest writer mutex poisoned");
345
346        let provider_id = self
347            .provider
348            .as_ref()
349            .map_or_else(|| "deterministic".into(), |p| p.model_id());
350        let id = decision_id("repair", node, target);
351        mw.record(Decision {
352            id,
353            module: self.config.module_path.clone(),
354            target: Some(target.id.clone()),
355            decision_type: DecisionType::Repair,
356            choice: response.fixed_code.clone(),
357            alternatives: Vec::new(),
358            reasoning: Some(format!(
359                "compiler error: {}; fixed by AI repair ({})",
360                summarize(original_error),
361                response
362                    .reasoning
363                    .as_deref()
364                    .unwrap_or("no reasoning supplied")
365            )),
366            model_id: provider_id,
367            confidence: response.confidence,
368            pinned: false,
369            pin_reason: None,
370            pinned_at: None,
371            pinned_by: None,
372            superseded_by: None,
373            timestamp: Utc::now(),
374        });
375        Ok(())
376    }
377
378    fn record_rule_applied(
379        &self,
380        node: &AIRNode,
381        target: &TargetProfile,
382        rule: &Rule,
383    ) -> Result<(), ManifestError> {
384        let Some(manifest) = &self.manifest else {
385            return Ok(());
386        };
387        let mut mw = manifest.lock().expect("manifest writer mutex poisoned");
388
389        let provider_id = self
390            .provider
391            .as_ref()
392            .map_or_else(|| "deterministic".into(), |p| p.model_id());
393        let id = decision_id(&format!("rule:{}", rule.id), node, target);
394        mw.record(Decision {
395            id,
396            module: self.config.module_path.clone(),
397            target: Some(target.id.clone()),
398            decision_type: DecisionType::RuleApplied,
399            choice: format!("rule {} matched pattern {}", rule.id, rule.node_kind),
400            alternatives: Vec::new(),
401            reasoning: Some(format!(
402                "candidate rule extracted from repair; future {} nodes may skip AI",
403                rule.node_kind
404            )),
405            model_id: provider_id,
406            confidence: rule.confidence,
407            pinned: rule.pinned,
408            pin_reason: rule.pinned.then(|| "manual".into()),
409            pinned_at: rule.pinned.then(Utc::now),
410            pinned_by: rule.pinned.then(|| "rule-author".into()),
411            superseded_by: None,
412            timestamp: Utc::now(),
413        });
414        Ok(())
415    }
416}
417
418// ─── Rule-cache-first pre-AI hook ────────────────────────────────────────────
419
420/// Outcome of the rule-cache pre-AI hook in D.6 step 3.
421#[derive(Debug, Clone, PartialEq)]
422pub enum RuleLookupOutcome {
423    /// A cached rule matched and was applied; no AI call needed.
424    Applied {
425        /// The rule that matched.
426        rule: Rule,
427        /// The code produced by applying the template.
428        code: String,
429    },
430    /// No rule matched for this node kind; the caller should fall
431    /// through to Tier 1 AI synthesis.
432    Miss,
433    /// Production strictness required a pinned rule and none was
434    /// found; the caller decides how to handle.
435    MissNeedsPin,
436}
437
438/// Apply the rule cache *before* an AI call.
439///
440/// Loads rules for `target_id`, filters by `production_only_pinned`,
441/// and returns the highest-priority match or a miss. On a hit, the
442/// caller records the [`DecisionType::RuleApplied`] decision so
443/// reviewers can see that the rule covered this node without AI.
444///
445/// # Errors
446/// Returns [`bock_ai::RuleCacheError`] on I/O or parse failure.
447pub fn try_apply_rule(
448    rules: &RuleCache,
449    target_id: &str,
450    node: &AIRNode,
451    strictness: Strictness,
452) -> Result<RuleLookupOutcome, bock_ai::RuleCacheError> {
453    let production_only = matches!(strictness, Strictness::Production);
454    let Some(rule) = rules.lookup(target_id, node, production_only)? else {
455        return Ok(if production_only {
456            RuleLookupOutcome::MissNeedsPin
457        } else {
458            RuleLookupOutcome::Miss
459        });
460    };
461    let code = apply_template(&rule.template, node);
462    Ok(RuleLookupOutcome::Applied { rule, code })
463}
464
465/// v1 template application: return the template verbatim.
466///
467/// The rule format is TBD per §17.7; a real interpolation engine
468/// (substituting `{{ scrutinee }}`, `{{ arms }}`, etc. from the node's
469/// children) is out of scope for D.6. Callers should treat the
470/// returned string as the rule's generated code for the current node.
471#[must_use]
472pub fn apply_template(template: &str, _node: &AIRNode) -> String {
473    template.to_string()
474}
475
476// ─── Helpers ─────────────────────────────────────────────────────────────────
477
478fn write_candidate(source_path: &Path, code: &str) -> std::io::Result<()> {
479    if let Some(parent) = source_path.parent() {
480        std::fs::create_dir_all(parent)?;
481    }
482    std::fs::write(source_path, code)
483}
484
485fn invocation_error(result: &Result<CompilationResult, ToolchainError>) -> String {
486    match result {
487        Ok(_) => "compilation unexpectedly succeeded".into(),
488        Err(ToolchainError::InvocationFailed {
489            stdout,
490            stderr,
491            exit_code,
492            ..
493        }) => {
494            let diag = if stderr.is_empty() { stdout } else { stderr };
495            format!(
496                "exit {}: {}",
497                exit_code
498                    .map(|c| c.to_string())
499                    .unwrap_or_else(|| "signal".into()),
500                summarize(diag)
501            )
502        }
503        Err(e) => format!("{e}"),
504    }
505}
506
507fn summarize(error: &str) -> String {
508    // Trim noisy trailing whitespace and truncate to a single line-ish
509    // excerpt for inclusion in decision records.
510    let trimmed = error.trim();
511    if trimmed.len() <= 512 {
512        return trimmed.into();
513    }
514    let mut s = String::with_capacity(515);
515    s.push_str(&trimmed[..512]);
516    s.push_str("...");
517    s
518}
519
520fn persist_rule(
521    rules: &RuleCache,
522    candidate: &CandidateRule,
523    node: &AIRNode,
524    confidence: f64,
525) -> Result<Rule, bock_ai::RuleCacheError> {
526    let kind = node_kind_name(&node.kind);
527    let rule = Rule::from_candidate(candidate, kind, confidence);
528    rules.insert(&rule)?;
529    Ok(rule)
530}
531
532fn decision_id(prefix: &str, node: &AIRNode, target: &TargetProfile) -> String {
533    #[derive(serde::Serialize)]
534    struct Keyed<'a> {
535        prefix: &'a str,
536        target: &'a str,
537        node_debug: String,
538    }
539    let keyed = Keyed {
540        prefix,
541        target: &target.id,
542        node_debug: format!("{node:?}"),
543    };
544    compute_key(&keyed).unwrap_or_else(|_| format!("{prefix}-{}", node.id))
545}
546
547#[cfg(test)]
548mod tests {
549    use super::*;
550    use bock_air::{NodeIdGen, NodeKind};
551    use bock_errors::Span;
552
553    fn dummy_node() -> AIRNode {
554        let gen = NodeIdGen::new();
555        AIRNode::new(
556            gen.next(),
557            Span::dummy(),
558            NodeKind::Block {
559                stmts: Vec::new(),
560                tail: None,
561            },
562        )
563    }
564
565    fn js_target() -> TargetProfile {
566        TargetProfile {
567            id: "js".into(),
568            display_name: "JavaScript".into(),
569            capabilities: Default::default(),
570            conventions: Default::default(),
571        }
572    }
573
574    #[test]
575    fn accepted_code_reports_working_outcome() {
576        let ok = RepairOutcome::FirstTrySuccess { code: "x".into() };
577        assert_eq!(ok.accepted_code(), Some("x"));
578        assert!(ok.is_success());
579
580        let rep = RepairOutcome::Repaired {
581            code: "y".into(),
582            attempts: 1,
583            rule_added: false,
584        };
585        assert_eq!(rep.accepted_code(), Some("y"));
586        assert!(rep.is_success());
587
588        let bad = RepairOutcome::NoProvider {
589            compiler_error: "boom".into(),
590        };
591        assert_eq!(bad.accepted_code(), None);
592        assert!(!bad.is_success());
593    }
594
595    #[test]
596    fn summarize_truncates_long_errors() {
597        let long = "x".repeat(1000);
598        let out = summarize(&long);
599        assert!(out.len() <= 515);
600        assert!(out.ends_with("..."));
601    }
602
603    #[test]
604    fn summarize_short_errors_unchanged() {
605        let out = summarize("  short error  ");
606        assert_eq!(out, "short error");
607    }
608
609    #[test]
610    fn apply_template_returns_template_verbatim() {
611        let code = apply_template("switch(x){}", &dummy_node());
612        assert_eq!(code, "switch(x){}");
613    }
614
615    #[test]
616    fn try_apply_rule_misses_with_empty_cache() {
617        let dir = tempfile::tempdir().unwrap();
618        let rules = RuleCache::new(dir.path());
619        let outcome = try_apply_rule(&rules, "js", &dummy_node(), Strictness::Development).unwrap();
620        assert_eq!(outcome, RuleLookupOutcome::Miss);
621    }
622
623    #[test]
624    fn try_apply_rule_hits_matching_kind() {
625        let dir = tempfile::tempdir().unwrap();
626        let rules = RuleCache::new(dir.path());
627        let candidate = CandidateRule {
628            target_id: "js".into(),
629            pattern: "empty block".into(),
630            template: "() => {}".into(),
631            priority: 1,
632        };
633        let rule = Rule::from_candidate(&candidate, "Block", 0.9);
634        rules.insert(&rule).unwrap();
635
636        let outcome = try_apply_rule(&rules, "js", &dummy_node(), Strictness::Sketch).unwrap();
637        match outcome {
638            RuleLookupOutcome::Applied { rule: r, code } => {
639                assert_eq!(r.node_kind, "Block");
640                assert_eq!(code, "() => {}");
641            }
642            other => panic!("expected Applied, got {other:?}"),
643        }
644    }
645
646    #[test]
647    fn try_apply_rule_reports_miss_needs_pin_in_production() {
648        let dir = tempfile::tempdir().unwrap();
649        let rules = RuleCache::new(dir.path());
650        let candidate = CandidateRule {
651            target_id: "js".into(),
652            pattern: "empty block".into(),
653            template: "() => {}".into(),
654            priority: 1,
655        };
656        let rule = Rule::from_candidate(&candidate, "Block", 0.9);
657        // not pinned — production should skip it
658        rules.insert(&rule).unwrap();
659
660        let outcome = try_apply_rule(&rules, "js", &dummy_node(), Strictness::Production).unwrap();
661        assert_eq!(outcome, RuleLookupOutcome::MissNeedsPin);
662    }
663
664    #[test]
665    fn pipeline_without_provider_returns_no_provider() {
666        use std::path::PathBuf;
667        // use a fake target whose toolchain will fail to locate the binary
668        let mut registry = ToolchainRegistry::new();
669        registry.register(crate::toolchain::ToolchainSpec {
670            target_id: "fake".into(),
671            display_name: "Fake".into(),
672            binary_name: "not_a_real_binary_repair_xyz".into(),
673            version_args: vec!["--version".into()],
674            compile_command: "not_a_real_binary_repair_xyz".into(),
675            compile_args: vec![],
676            install_hint: "n/a".into(),
677        });
678        let toolchain = Arc::new(registry);
679        let pipeline = RepairPipeline::without_provider(toolchain, RepairConfig::default());
680        // Calling run with a NotFound error should bubble up as a
681        // RepairError::Toolchain (not an InvocationFailed), which the
682        // pipeline escalates. This verifies the pre-condition we rely on
683        // in the no-provider branch.
684        let dir = tempfile::tempdir().unwrap();
685        let src = dir.path().join("out.js");
686        let target = TargetProfile {
687            id: "fake".into(),
688            display_name: "Fake".into(),
689            capabilities: Default::default(),
690            conventions: Default::default(),
691        };
692        let rt = tokio::runtime::Builder::new_current_thread()
693            .enable_all()
694            .build()
695            .unwrap();
696        let result = rt.block_on(pipeline.run(&target, &dummy_node(), "x".into(), &src));
697        assert!(result.is_err(), "expected NotFound escalation");
698        let _ = PathBuf::new();
699        let _ = js_target();
700    }
701}