Skip to main content

semver_analyzer_llm/
lib.rs

1//! LLM-based behavioral analysis for the semver-analyzer.
2//!
3//! This crate implements the `BehaviorAnalyzer` trait from `semver-analyzer-core`.
4//! It provides:
5//!
6//! 1. **Agent-agnostic LLM invocation** via `--llm-command` (goose, opencode, etc.)
7//! 2. **Template-constrained spec inference** — prompts that produce `FunctionSpec` JSON
8//! 3. **Tier 1 structural spec comparison** — mechanical comparison without LLM
9//! 4. **Tier 2 LLM fallback** — for ambiguous `notes` diffs and fuzzy matches
10//!
11//! ## Usage
12//!
13//! ```rust,ignore
14//! use semver_analyzer_llm::LlmBehaviorAnalyzer;
15//!
16//! let analyzer = LlmBehaviorAnalyzer::new("goose run --no-session -q -t");
17//! let spec = analyzer.infer_spec(&function_body, &signature)?;
18//! ```
19
20pub mod invoke;
21mod prompts;
22mod spec_compare;
23
24use anyhow::Result;
25pub use invoke::{
26    FileApiChange, FileBehavioralChange, LlmConstantRenamePattern, LlmInterfaceRenameMapping,
27    LlmSuffixRename,
28};
29use semver_analyzer_core::{
30    BehaviorAnalyzer, BreakingVerdict, ChangedFunction, FunctionSpec, LlmCategoryDefinition,
31    TestDiff,
32};
33
34/// LLM-based implementation of `BehaviorAnalyzer`.
35///
36/// Uses an external command (e.g., `goose run`, `opencode run`) to invoke
37/// an LLM for spec inference. The command receives a prompt as its final
38/// argument and is expected to return a response on stdout.
39pub struct LlmBehaviorAnalyzer {
40    /// The command template for invoking the LLM.
41    /// The prompt is appended as the final argument.
42    /// e.g., "goose run --no-session -q -t" or "opencode run"
43    llm_command: String,
44
45    /// Timeout in seconds for each LLM invocation.
46    timeout_secs: u64,
47}
48
49impl LlmBehaviorAnalyzer {
50    /// Create a new LLM analyzer with the given command.
51    pub fn new(llm_command: &str) -> Self {
52        Self {
53            llm_command: llm_command.to_string(),
54            timeout_secs: 120,
55        }
56    }
57
58    /// Set the timeout for LLM invocations.
59    pub fn with_timeout(mut self, timeout_secs: u64) -> Self {
60        self.timeout_secs = timeout_secs;
61        self
62    }
63
64    /// Run an LLM command with debug logging.
65    fn run_llm(&self, prompt: &str) -> Result<String> {
66        tracing::debug!(prompt_bytes = prompt.len(), "sending LLM prompt");
67        let result = invoke::run_llm_command(&self.llm_command, prompt, self.timeout_secs);
68        match &result {
69            Ok(response) => {
70                tracing::debug!(
71                    response_bytes = response.len(),
72                    response_tail = %&response[response.len().saturating_sub(200)..],
73                    "LLM response received"
74                );
75            }
76            Err(e) => {
77                tracing::debug!(%e, "LLM command failed");
78            }
79        }
80        result
81    }
82}
83
84impl LlmBehaviorAnalyzer {
85    /// Analyze a single file's diff for breaking changes (behavioral + API type-level).
86    ///
87    /// This is the file-level approach: one LLM call per file instead of
88    /// 2+ calls per function. The prompt includes the git diff and the
89    /// list of changed function signatures.
90    ///
91    /// The `categories` parameter provides language-specific behavioral change
92    /// categories for the LLM prompt. Pass `&lang.llm_categories()` from the
93    /// `Language` trait implementation.
94    ///
95    /// Returns (behavioral_changes, api_changes).
96    pub fn analyze_file_diff(
97        &self,
98        file_path: &str,
99        diff_content: &str,
100        changed_functions: &[ChangedFunction],
101        test_diff: Option<&str>,
102        categories: &[LlmCategoryDefinition],
103    ) -> Result<(Vec<FileBehavioralChange>, Vec<FileApiChange>)> {
104        let prompt = prompts::build_file_behavioral_prompt(
105            file_path,
106            diff_content,
107            changed_functions,
108            test_diff,
109            categories,
110        );
111        let response = self.run_llm(&prompt)?;
112        invoke::parse_file_behavioral_response(&response)
113    }
114
115    /// Infer constant rename patterns from sampled removed/added constant names.
116    pub fn infer_constant_renames(
117        &self,
118        removed_sample: &[&str],
119        added_sample: &[&str],
120        package_name: &str,
121        from_ref: &str,
122        to_ref: &str,
123    ) -> Result<Vec<LlmConstantRenamePattern>> {
124        let prompt = prompts::build_constant_rename_prompt(
125            removed_sample,
126            added_sample,
127            package_name,
128            from_ref,
129            to_ref,
130        );
131        let response = self.run_llm(&prompt)?;
132        invoke::parse_constant_rename_response(&response)
133    }
134
135    /// Run a pre-built prompt and parse the response as a component hierarchy.
136    ///
137    /// The prompt text is language-specific and should be provided by the
138    /// `Language` implementation. The LLM crate only handles execution and
139    /// JSON parsing of the response.
140    pub fn infer_hierarchy_from_prompt(
141        &self,
142        prompt: &str,
143    ) -> Result<std::collections::HashMap<String, Vec<semver_analyzer_core::ExpectedChild>>> {
144        let response = self.run_llm(prompt)?;
145        invoke::parse_hierarchy_response(&response)
146    }
147
148    /// Run a pre-built prompt and parse the response as suffix rename pairs.
149    ///
150    /// The prompt text is language-specific (e.g., CSS logical property
151    /// renames for TypeScript). The LLM crate only handles execution and
152    /// JSON parsing of the response.
153    pub fn infer_suffix_renames_from_prompt(
154        &self,
155        prompt: &str,
156    ) -> Result<Vec<invoke::LlmSuffixRename>> {
157        let response = self.run_llm(prompt)?;
158        invoke::parse_suffix_rename_response(&response)
159    }
160
161    /// Infer interface/component rename mappings from removed/added interface data.
162    pub fn infer_interface_renames(
163        &self,
164        removed: &[(&str, &[String])],
165        added: &[(&str, &[String])],
166        package_name: &str,
167        from_ref: &str,
168        to_ref: &str,
169    ) -> Result<Vec<LlmInterfaceRenameMapping>> {
170        let prompt =
171            prompts::build_interface_rename_prompt(removed, added, package_name, from_ref, to_ref);
172        let response = self.run_llm(&prompt)?;
173        invoke::parse_interface_rename_response(&response)
174    }
175}
176
177impl BehaviorAnalyzer for LlmBehaviorAnalyzer {
178    fn infer_spec(&self, function_body: &str, signature: &str) -> Result<FunctionSpec> {
179        let prompt = prompts::build_spec_inference_prompt(function_body, signature);
180        let response = self.run_llm(&prompt)?;
181        invoke::parse_function_spec(&response)
182    }
183
184    fn infer_spec_with_test_context(
185        &self,
186        function_body: &str,
187        signature: &str,
188        test_context: &TestDiff,
189    ) -> Result<FunctionSpec> {
190        let prompt =
191            prompts::build_spec_inference_with_test_prompt(function_body, signature, test_context);
192        let response = self.run_llm(&prompt)?;
193        invoke::parse_function_spec(&response)
194    }
195
196    fn specs_are_breaking(
197        &self,
198        old: &FunctionSpec,
199        new: &FunctionSpec,
200    ) -> Result<BreakingVerdict> {
201        // Tier 1: Structural comparison (no LLM)
202        let tier1 = spec_compare::structural_compare(old, new);
203
204        if tier1.is_breaking || tier1.confidence >= 0.80 {
205            return Ok(tier1);
206        }
207
208        // Tier 2: LLM fallback for notes diffs and ambiguous cases
209        if !old.notes.is_empty() || !new.notes.is_empty() {
210            let prompt = prompts::build_spec_comparison_prompt(old, new);
211            let response = self.run_llm(&prompt)?;
212            return invoke::parse_breaking_verdict(&response);
213        }
214
215        // No breaking changes detected
216        Ok(tier1)
217    }
218
219    fn check_propagation(
220        &self,
221        caller_body: &str,
222        caller_signature: &str,
223        callee_name: &str,
224        evidence_description: &str,
225    ) -> Result<bool> {
226        let prompt = prompts::build_propagation_check_prompt(
227            caller_body,
228            caller_signature,
229            callee_name,
230            evidence_description,
231        );
232        let response = self.run_llm(&prompt)?;
233        invoke::parse_propagation_result(&response)
234    }
235}