aptu_core/ai/
provider.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! AI provider trait and shared implementations.
4//!
5//! Defines the `AiProvider` trait that all AI providers must implement,
6//! along with default implementations for shared logic like prompt building,
7//! request sending, and response parsing.
8
9use anyhow::{Context, Result};
10use async_trait::async_trait;
11use regex::Regex;
12use reqwest::Client;
13use secrecy::SecretString;
14use std::sync::LazyLock;
15use tracing::{debug, instrument};
16
17use super::AiResponse;
18use super::registry::PROVIDER_ANTHROPIC;
19use super::types::{
20    ChatCompletionRequest, ChatCompletionResponse, ChatMessage, IssueDetails, ResponseFormat,
21    TriageResponse,
22};
23use crate::history::AiStats;
24
25use super::prompts::{
26    build_create_system_prompt, build_pr_label_system_prompt, build_pr_review_system_prompt,
27    build_triage_system_prompt,
28};
29
30/// Maximum number of characters retained from an AI provider error response body.
31const MAX_ERROR_BODY_LENGTH: usize = 200;
32
33/// Redacts error body to prevent leaking sensitive API details.
34/// Truncates to [`MAX_ERROR_BODY_LENGTH`] characters and appends "[truncated]" if longer.
35fn redact_api_error_body(body: &str) -> String {
36    if body.chars().count() <= MAX_ERROR_BODY_LENGTH {
37        body.to_owned()
38    } else {
39        let truncated: String = body.chars().take(MAX_ERROR_BODY_LENGTH).collect();
40        format!("{truncated} [truncated]")
41    }
42}
43
44/// Parses JSON response from AI provider, detecting truncated responses.
45///
46/// If the JSON parsing fails with an EOF error (indicating the response was cut off),
47/// returns a `TruncatedResponse` error that can be retried. Other JSON errors are
48/// wrapped as `InvalidAIResponse`.
49///
50/// # Arguments
51///
52/// * `text` - The JSON text to parse
53/// * `provider` - The name of the AI provider (for error context)
54///
55/// # Returns
56///
57/// Parsed value of type T, or an error if parsing fails
58fn parse_ai_json<T: serde::de::DeserializeOwned>(text: &str, provider: &str) -> Result<T> {
59    match serde_json::from_str::<T>(text) {
60        Ok(value) => Ok(value),
61        Err(e) => {
62            // Check if this is an EOF error (truncated response)
63            if e.is_eof() {
64                Err(anyhow::anyhow!(
65                    crate::error::AptuError::TruncatedResponse {
66                        provider: provider.to_string(),
67                    }
68                ))
69            } else {
70                Err(anyhow::anyhow!(crate::error::AptuError::InvalidAIResponse(
71                    e
72                )))
73            }
74        }
75    }
76}
77
78/// Maximum length for issue body to stay within token limits.
79pub const MAX_BODY_LENGTH: usize = 4000;
80
81/// Maximum number of comments to include in the prompt.
82pub const MAX_COMMENTS: usize = 5;
83
84/// Maximum number of files to include in PR review prompt.
85pub const MAX_FILES: usize = 20;
86
87/// Maximum total diff size (in characters) for PR review prompt.
88pub const MAX_TOTAL_DIFF_SIZE: usize = 50_000;
89
90/// Maximum number of labels to include in the prompt.
91pub const MAX_LABELS: usize = 30;
92
93/// Maximum number of milestones to include in the prompt.
94pub const MAX_MILESTONES: usize = 10;
95
96/// Estimated overhead for XML tags, section headers, and schema preamble added by
97/// `build_pr_review_user_prompt`. Used to ensure the prompt budget accounts for
98/// non-content characters when estimating total prompt size.
99const PROMPT_OVERHEAD_CHARS: usize = 1_000;
100
101/// Preamble appended to every user-turn prompt to request a JSON response matching the schema.
102const SCHEMA_PREAMBLE: &str = "\n\nRespond with valid JSON matching this schema:\n";
103
104/// Matches structural XML delimiter tags (case-insensitive) used as prompt delimiters.
105/// These must be stripped from user-controlled fields to prevent prompt injection.
106///
107/// Covers: `pull_request`, `issue_content`, `issue_body`, `pr_diff`, `commit_message`, `pr_comment`, `file_content`.
108///
109/// The pattern uses a simple alternation with no quantifiers, so `ReDoS` is not a concern:
110/// regex engine complexity is O(n) in the input length regardless of content.
111static XML_DELIMITERS: LazyLock<Regex> = LazyLock::new(|| {
112    Regex::new(
113        r"(?i)</?(?:pull_request|issue_content|issue_body|pr_diff|commit_message|pr_comment|file_content|dependency_release_notes)>",
114    )
115    .expect("valid regex")
116});
117
118/// Removes `<pull_request>` / `</pull_request>` and `<issue_content>` / `</issue_content>`
119/// XML delimiter tags from a user-supplied string, preventing prompt injection via XML tag
120/// smuggling.
121///
122/// Tags are removed entirely (replaced with empty string) rather than substituted with a
123/// placeholder. A visible placeholder such as `[sanitized]` could cause the LLM to reason
124/// about the substitution marker itself, which is unnecessary and potentially confusing.
125///
126/// Nested or malformed XML is not a concern: the only delimiters this code inserts into
127/// prompts are the exact strings `<pull_request>` / `</pull_request>` and
128/// `<issue_content>` / `</issue_content>` (no attributes, no nesting). Stripping those
129/// fixed forms is sufficient to prevent a user-supplied value from breaking out of the
130/// delimiter boundary.
131///
132/// Applied to all user-controlled fields inside prompt delimiter blocks:
133/// - Issue triage: `issue.title`, `issue.body`, comment author/body, related issue
134///   title/state, label name/description, milestone title/description.
135/// - PR review: `pr.title`, `pr.body`, `file.filename`, `file.status`, patch content.
136fn sanitize_prompt_field(s: &str) -> String {
137    XML_DELIMITERS.replace_all(s, "").into_owned()
138}
139
140/// AI provider trait for issue triage and creation.
141///
142/// Defines the interface that all AI providers must implement.
143/// Default implementations are provided for shared logic.
144#[async_trait]
145pub trait AiProvider: Send + Sync {
146    /// Returns the name of the provider (e.g., "gemini", "openrouter").
147    fn name(&self) -> &str;
148
149    /// Returns the API URL for this provider.
150    fn api_url(&self) -> &str;
151
152    /// Returns the environment variable name for the API key.
153    fn api_key_env(&self) -> &str;
154
155    /// Returns the HTTP client for making requests.
156    fn http_client(&self) -> &Client;
157
158    /// Returns the API key for authentication.
159    fn api_key(&self) -> &SecretString;
160
161    /// Returns the model name.
162    fn model(&self) -> &str;
163
164    /// Returns the maximum tokens for API responses.
165    fn max_tokens(&self) -> u32;
166
167    /// Returns the temperature for API requests.
168    fn temperature(&self) -> f32;
169
170    /// Returns whether this provider is Anthropic-compatible and supports
171    /// `cache_control` on message blocks.
172    ///
173    /// Default implementation checks `self.name() == "anthropic"`. Providers
174    /// that route through a different name but support Anthropic prompt caching
175    /// can override this method.
176    fn is_anthropic(&self) -> bool {
177        self.name() == PROVIDER_ANTHROPIC
178    }
179
180    /// Returns the maximum retry attempts for rate-limited requests.
181    ///
182    /// Default implementation returns 3. Providers can override
183    /// to use a different retry limit.
184    fn max_attempts(&self) -> u32 {
185        3
186    }
187
188    /// Returns the circuit breaker for this provider (optional).
189    ///
190    /// Default implementation returns None. Providers can override
191    /// to provide circuit breaker functionality.
192    fn circuit_breaker(&self) -> Option<&super::CircuitBreaker> {
193        None
194    }
195
196    /// Builds HTTP headers for API requests.
197    ///
198    /// Default implementation includes Authorization and Content-Type headers.
199    /// Providers can override to add custom headers.
200    fn build_headers(&self) -> reqwest::header::HeaderMap {
201        let mut headers = reqwest::header::HeaderMap::new();
202        if let Ok(val) = "application/json".parse() {
203            headers.insert("Content-Type", val);
204        }
205        headers
206    }
207
208    /// Validates the model configuration.
209    ///
210    /// Default implementation does nothing. Providers can override
211    /// to enforce constraints (e.g., free tier validation).
212    fn validate_model(&self) -> Result<()> {
213        Ok(())
214    }
215
216    /// Returns the custom guidance string for system prompt injection, if set.
217    ///
218    /// Default implementation returns `None`. Providers that store custom guidance
219    /// (e.g., from `AiConfig`) override this to supply it.
220    fn custom_guidance(&self) -> Option<&str> {
221        None
222    }
223
224    /// Sends a chat completion request to the provider's API (HTTP-only, no retry).
225    ///
226    /// Default implementation handles HTTP headers, error responses (401, 429).
227    /// Does not include retry logic - use `send_and_parse()` for retry behavior.
228    #[instrument(skip(self, request), fields(provider = self.name(), model = self.model()))]
229    async fn send_request_inner(
230        &self,
231        request: &ChatCompletionRequest,
232    ) -> Result<ChatCompletionResponse> {
233        use secrecy::ExposeSecret;
234        use tracing::warn;
235
236        use crate::error::AptuError;
237
238        let mut req = self.http_client().post(self.api_url());
239
240        // Add Authorization header (skip for Anthropic, which uses x-api-key)
241        if !self.is_anthropic() {
242            req = req.header(
243                "Authorization",
244                format!("Bearer {}", self.api_key().expose_secret()),
245            );
246        }
247
248        // Add custom headers from provider
249        for (key, value) in &self.build_headers() {
250            req = req.header(key.clone(), value.clone());
251        }
252
253        let response = req
254            .json(request)
255            .send()
256            .await
257            .context(format!("Failed to send request to {} API", self.name()))?;
258
259        // Check for HTTP errors
260        let status = response.status();
261        if !status.is_success() {
262            if status.as_u16() == 401 {
263                anyhow::bail!(
264                    "Invalid {} API key. Check your {} environment variable.",
265                    self.name(),
266                    self.api_key_env()
267                );
268            } else if status.as_u16() == 429 {
269                warn!("Rate limited by {} API", self.name());
270                // Parse Retry-After header (seconds), default to 0 if not present
271                let retry_after = response
272                    .headers()
273                    .get("Retry-After")
274                    .and_then(|h| h.to_str().ok())
275                    .and_then(|s| s.parse::<u64>().ok())
276                    .unwrap_or(0);
277                debug!(retry_after, "Parsed Retry-After header");
278                return Err(AptuError::RateLimited {
279                    provider: self.name().to_string(),
280                    retry_after,
281                }
282                .into());
283            }
284            let error_body = response.text().await.unwrap_or_default();
285            anyhow::bail!(
286                "{} API error (HTTP {}): {}",
287                self.name(),
288                status.as_u16(),
289                redact_api_error_body(&error_body)
290            );
291        }
292
293        // Parse response
294        let completion: ChatCompletionResponse = response
295            .json()
296            .await
297            .context(format!("Failed to parse {} API response", self.name()))?;
298
299        Ok(completion)
300    }
301
302    /// Sends a chat completion request and parses the response with retry logic.
303    ///
304    /// This method wraps both HTTP request and JSON parsing in a single retry loop,
305    /// allowing truncated responses to be retried. Includes circuit breaker handling.
306    ///
307    /// # Arguments
308    ///
309    /// * `request` - The chat completion request to send
310    ///
311    /// # Returns
312    ///
313    /// A tuple of (parsed response, stats) extracted from the API response
314    ///
315    /// # Errors
316    ///
317    /// Returns an error if:
318    /// - API request fails (network, timeout, rate limit)
319    /// - Response cannot be parsed as valid JSON (including truncated responses)
320    #[instrument(skip(self, request), fields(provider = self.name(), model = self.model()))]
321    async fn send_and_parse<T: serde::de::DeserializeOwned + Send>(
322        &self,
323        request: &ChatCompletionRequest,
324    ) -> Result<(T, AiStats, Vec<String>)> {
325        use tracing::{info, warn};
326
327        use crate::error::AptuError;
328        use crate::retry::{extract_retry_after, is_retryable_anyhow};
329
330        // Check circuit breaker before attempting request
331        if let Some(cb) = self.circuit_breaker()
332            && cb.is_open()
333        {
334            return Err(AptuError::CircuitOpen.into());
335        }
336
337        // Start timing (outside retry loop to measure total time including retries)
338        let start = std::time::Instant::now();
339
340        // Custom retry loop that respects retry_after from RateLimited errors
341        let mut attempt: u32 = 0;
342        let max_attempts: u32 = self.max_attempts();
343
344        // Helper function to avoid closure-in-expression clippy warning
345        #[allow(clippy::items_after_statements)]
346        async fn try_request<T: serde::de::DeserializeOwned>(
347            provider: &(impl AiProvider + ?Sized),
348            request: &ChatCompletionRequest,
349        ) -> Result<(T, ChatCompletionResponse)> {
350            // Send HTTP request
351            let completion = provider.send_request_inner(request).await?;
352
353            // Extract message content
354            let content = completion
355                .choices
356                .first()
357                .and_then(|c| {
358                    c.message
359                        .content
360                        .clone()
361                        .or_else(|| c.message.reasoning.clone())
362                })
363                .context("No response from AI model")?;
364
365            debug!(response_length = content.len(), "Received AI response");
366
367            // Parse JSON response (inside retry loop, so truncated responses are retried)
368            let parsed: T = parse_ai_json(&content, provider.name())?;
369
370            Ok((parsed, completion))
371        }
372
373        let (parsed, completion): (T, ChatCompletionResponse) = loop {
374            attempt += 1;
375
376            let result = try_request(self, request).await;
377
378            match result {
379                Ok(success) => break success,
380                Err(err) => {
381                    // Check if error is retryable
382                    if !is_retryable_anyhow(&err) || attempt >= max_attempts {
383                        return Err(err);
384                    }
385
386                    // Extract retry_after if present, otherwise use exponential backoff
387                    let delay = if let Some(retry_after_duration) = extract_retry_after(&err) {
388                        debug!(
389                            retry_after_secs = retry_after_duration.as_secs(),
390                            "Using Retry-After value from rate limit error"
391                        );
392                        retry_after_duration
393                    } else {
394                        // Use exponential backoff with jitter: 1s, 2s, 4s + 0-500ms
395                        let backoff_secs = 2_u64.pow(attempt.saturating_sub(1));
396                        let jitter_ms = fastrand::u64(0..500);
397                        std::time::Duration::from_millis(backoff_secs * 1000 + jitter_ms)
398                    };
399
400                    let error_msg = err.to_string();
401                    warn!(
402                        error = %error_msg,
403                        delay_secs = delay.as_secs(),
404                        attempt,
405                        max_attempts,
406                        "Retrying after error"
407                    );
408
409                    // Drop err before await to avoid holding non-Send value across await
410                    drop(err);
411                    tokio::time::sleep(delay).await;
412                }
413            }
414        };
415
416        // Record success in circuit breaker
417        if let Some(cb) = self.circuit_breaker() {
418            cb.record_success();
419        }
420
421        // Calculate duration (total time including any retries)
422        #[allow(clippy::cast_possible_truncation)]
423        let duration_ms = start.elapsed().as_millis() as u64;
424
425        // Build AI stats from usage info (trust API's cost field)
426        let (input_tokens, output_tokens, cost_usd, cache_read_tokens, cache_write_tokens) =
427            if let Some(usage) = completion.usage {
428                (
429                    usage.prompt_tokens,
430                    usage.completion_tokens,
431                    usage.cost,
432                    usage.cache_read_tokens,
433                    usage.cache_write_tokens,
434                )
435            } else {
436                // If no usage info, default to 0
437                debug!("No usage information in API response");
438                (0, 0, None, 0, 0)
439            };
440
441        let ai_stats = AiStats {
442            provider: self.name().to_string(),
443            model: self.model().to_string(),
444            input_tokens,
445            output_tokens,
446            duration_ms,
447            cost_usd,
448            fallback_provider: None,
449            prompt_chars: 0,
450            cache_read_tokens,
451            cache_write_tokens,
452            effective_token_units: 0.0,
453            trace_id: None,
454        }
455        .with_computed_etu();
456
457        // Extract finish_reasons from choices
458        let finish_reasons: Vec<String> = completion
459            .choices
460            .iter()
461            .filter_map(|c| c.finish_reason.clone())
462            .collect();
463
464        // Emit structured metrics
465        info!(
466            duration_ms,
467            input_tokens,
468            output_tokens,
469            cache_read_tokens,
470            cache_write_tokens,
471            cost_usd = ?cost_usd,
472            model = %self.model(),
473            "AI request completed"
474        );
475
476        // Log cache hit/miss details
477        debug!(
478            cache_read_tokens = %cache_read_tokens,
479            cache_write_tokens = %cache_write_tokens,
480            "Cache token usage"
481        );
482
483        Ok((parsed, ai_stats, finish_reasons))
484    }
485
486    /// Analyzes a GitHub issue using the provider's API.
487    ///
488    /// Returns a structured triage response with summary, labels, questions, duplicates, and usage stats.
489    ///
490    /// # Arguments
491    ///
492    /// * `issue` - Issue details to analyze
493    ///
494    /// # Errors
495    ///
496    /// Returns an error if:
497    /// - API request fails (network, timeout, rate limit)
498    /// - Response cannot be parsed as valid JSON
499    #[instrument(skip(self, issue), fields(issue_number = issue.number, repo = %format!("{}/{}", issue.owner, issue.repo)))]
500    async fn analyze_issue(&self, issue: &IssueDetails) -> Result<AiResponse> {
501        debug!(model = %self.model(), "Calling {} API", self.name());
502
503        // Build request
504        let system_content = if let Some(override_prompt) =
505            super::context::load_system_prompt_override("triage_system").await
506        {
507            override_prompt
508        } else {
509            Self::build_system_prompt(self.custom_guidance())
510        };
511
512        let mut messages = vec![
513            ChatMessage {
514                role: "system".to_string(),
515                content: Some(system_content),
516                reasoning: None,
517                cache_control: None,
518            },
519            ChatMessage {
520                role: "user".to_string(),
521                content: Some(Self::build_user_prompt(issue)),
522                reasoning: None,
523                cache_control: None,
524            },
525        ];
526
527        // Inject cache control on system message for Anthropic
528        if self.is_anthropic()
529            && let Some(msg) = messages.first_mut()
530        {
531            msg.cache_control = Some(super::types::CacheControl::ephemeral());
532        }
533
534        let request = ChatCompletionRequest {
535            model: self.model().to_string(),
536            messages,
537            response_format: Some(ResponseFormat {
538                format_type: "json_object".to_string(),
539                json_schema: None,
540            }),
541            max_tokens: Some(self.max_tokens()),
542            temperature: Some(self.temperature()),
543        };
544
545        // Send request and parse JSON with retry logic
546        let (triage, ai_stats, _finish_reasons) =
547            self.send_and_parse::<TriageResponse>(&request).await?;
548
549        debug!(
550            input_tokens = ai_stats.input_tokens,
551            output_tokens = ai_stats.output_tokens,
552            duration_ms = ai_stats.duration_ms,
553            cost_usd = ?ai_stats.cost_usd,
554            "AI analysis complete"
555        );
556
557        Ok(AiResponse {
558            triage,
559            stats: ai_stats,
560        })
561    }
562
563    /// Creates a formatted GitHub issue using the provider's API.
564    ///
565    /// Takes raw issue title and body, formats them using AI (conventional commit style,
566    /// structured body), and returns the formatted content with suggested labels.
567    ///
568    /// # Arguments
569    ///
570    /// * `title` - Raw issue title from user
571    /// * `body` - Raw issue body/description from user
572    /// * `repo` - Repository name for context (owner/repo format)
573    ///
574    /// # Errors
575    ///
576    /// Returns an error if:
577    /// - API request fails (network, timeout, rate limit)
578    /// - Response cannot be parsed as valid JSON
579    #[instrument(skip(self), fields(repo = %repo))]
580    async fn create_issue(
581        &self,
582        title: &str,
583        body: &str,
584        repo: &str,
585    ) -> Result<(super::types::CreateIssueResponse, AiStats)> {
586        debug!(model = %self.model(), "Calling {} API for issue creation", self.name());
587
588        // Build request
589        let system_content = if let Some(override_prompt) =
590            super::context::load_system_prompt_override("create_system").await
591        {
592            override_prompt
593        } else {
594            Self::build_create_system_prompt(self.custom_guidance())
595        };
596
597        let mut messages = vec![
598            ChatMessage {
599                role: "system".to_string(),
600                content: Some(system_content),
601                reasoning: None,
602                cache_control: None,
603            },
604            ChatMessage {
605                role: "user".to_string(),
606                content: Some(Self::build_create_user_prompt(title, body, repo)),
607                reasoning: None,
608                cache_control: None,
609            },
610        ];
611
612        // Inject cache control on system message for Anthropic
613        if self.is_anthropic()
614            && let Some(msg) = messages.first_mut()
615        {
616            msg.cache_control = Some(super::types::CacheControl::ephemeral());
617        }
618
619        let request = ChatCompletionRequest {
620            model: self.model().to_string(),
621            messages,
622            response_format: Some(ResponseFormat {
623                format_type: "json_object".to_string(),
624                json_schema: None,
625            }),
626            max_tokens: Some(self.max_tokens()),
627            temperature: Some(self.temperature()),
628        };
629
630        // Send request and parse JSON with retry logic
631        let (create_response, ai_stats, _finish_reasons) = self
632            .send_and_parse::<super::types::CreateIssueResponse>(&request)
633            .await?;
634
635        debug!(
636            title_len = create_response.formatted_title.len(),
637            body_len = create_response.formatted_body.len(),
638            labels = create_response.suggested_labels.len(),
639            input_tokens = ai_stats.input_tokens,
640            output_tokens = ai_stats.output_tokens,
641            duration_ms = ai_stats.duration_ms,
642            "Issue formatting complete with stats"
643        );
644
645        Ok((create_response, ai_stats))
646    }
647
648    /// Builds the system prompt for issue triage.
649    #[must_use]
650    fn build_system_prompt(custom_guidance: Option<&str>) -> String {
651        let context = super::context::load_custom_guidance(custom_guidance);
652        build_triage_system_prompt(&context)
653    }
654
655    /// Builds the user prompt containing the issue details.
656    #[must_use]
657    fn build_user_prompt(issue: &IssueDetails) -> String {
658        use std::fmt::Write;
659
660        let mut prompt = String::new();
661
662        prompt.push_str("<issue_content>\n");
663        let _ = writeln!(prompt, "Title: {}\n", sanitize_prompt_field(&issue.title));
664
665        // Sanitize body before truncation (injection tag could straddle the boundary)
666        let sanitized_body = sanitize_prompt_field(&issue.body);
667        let body = if sanitized_body.len() > MAX_BODY_LENGTH {
668            format!(
669                "{}...\n[APTU: body truncated by size budget -- do not speculate on missing content]",
670                &sanitized_body[..MAX_BODY_LENGTH],
671            )
672        } else if sanitized_body.is_empty() {
673            "[No description provided]".to_string()
674        } else {
675            sanitized_body
676        };
677        let _ = writeln!(prompt, "Body:\n{body}\n");
678
679        // Include existing labels
680        if !issue.labels.is_empty() {
681            let _ = writeln!(prompt, "Existing Labels: {}\n", issue.labels.join(", "));
682        }
683
684        // Include recent comments (limited)
685        if !issue.comments.is_empty() {
686            prompt.push_str("Recent Comments:\n");
687            for comment in issue.comments.iter().take(MAX_COMMENTS) {
688                let sanitized_comment_body = sanitize_prompt_field(&comment.body);
689                let comment_body = if sanitized_comment_body.len() > 500 {
690                    format!("{}...", &sanitized_comment_body[..500])
691                } else {
692                    sanitized_comment_body
693                };
694                let _ = writeln!(
695                    prompt,
696                    "- @{}: {}",
697                    sanitize_prompt_field(&comment.author),
698                    comment_body
699                );
700            }
701            prompt.push('\n');
702        }
703
704        // Include related issues from search (for context)
705        if !issue.repo_context.is_empty() {
706            prompt.push_str("Related Issues in Repository (for context):\n");
707            for related in issue.repo_context.iter().take(10) {
708                let _ = writeln!(
709                    prompt,
710                    "- #{} [{}] {}",
711                    related.number,
712                    sanitize_prompt_field(&related.state),
713                    sanitize_prompt_field(&related.title)
714                );
715            }
716            prompt.push('\n');
717        }
718
719        // Include repository structure (source files)
720        if !issue.repo_tree.is_empty() {
721            prompt.push_str("Repository Structure (source files):\n");
722            for path in issue.repo_tree.iter().take(20) {
723                let _ = writeln!(prompt, "- {path}");
724            }
725            prompt.push('\n');
726        }
727
728        // Include available labels
729        if !issue.available_labels.is_empty() {
730            prompt.push_str("Available Labels:\n");
731            for label in issue.available_labels.iter().take(MAX_LABELS) {
732                let description = if label.description.is_empty() {
733                    String::new()
734                } else {
735                    format!(" - {}", sanitize_prompt_field(&label.description))
736                };
737                let _ = writeln!(
738                    prompt,
739                    "- {} (color: #{}){}",
740                    sanitize_prompt_field(&label.name),
741                    label.color,
742                    description
743                );
744            }
745            prompt.push('\n');
746        }
747
748        // Include available milestones
749        if !issue.available_milestones.is_empty() {
750            prompt.push_str("Available Milestones:\n");
751            for milestone in issue.available_milestones.iter().take(MAX_MILESTONES) {
752                let description = if milestone.description.is_empty() {
753                    String::new()
754                } else {
755                    format!(" - {}", sanitize_prompt_field(&milestone.description))
756                };
757                let _ = writeln!(
758                    prompt,
759                    "- {}{}",
760                    sanitize_prompt_field(&milestone.title),
761                    description
762                );
763            }
764            prompt.push('\n');
765        }
766
767        prompt.push_str("</issue_content>");
768        prompt.push_str(SCHEMA_PREAMBLE);
769        prompt.push_str(crate::ai::prompts::TRIAGE_SCHEMA);
770
771        prompt
772    }
773
774    /// Builds the system prompt for issue creation/formatting.
775    #[must_use]
776    fn build_create_system_prompt(custom_guidance: Option<&str>) -> String {
777        let context = super::context::load_custom_guidance(custom_guidance);
778        build_create_system_prompt(&context)
779    }
780
781    /// Builds the user prompt for issue creation/formatting.
782    #[must_use]
783    fn build_create_user_prompt(title: &str, body: &str, _repo: &str) -> String {
784        let sanitized_title = sanitize_prompt_field(title);
785        let sanitized_body = sanitize_prompt_field(body);
786        format!(
787            "Please format this GitHub issue:\n\nTitle: {sanitized_title}\n\nBody:\n{sanitized_body}{}{}",
788            SCHEMA_PREAMBLE,
789            crate::ai::prompts::CREATE_SCHEMA
790        )
791    }
792
793    /// Estimates the initial size of a PR review prompt in characters.
794    ///
795    /// Sums title, body, file metadata, patches, `full_content`, `dep_enrichments`,
796    /// `ast_context`, `call_graph`, and overhead.
797    #[must_use]
798    fn estimate_pr_size(
799        pr: &super::types::PrDetails,
800        ast_context: &str,
801        call_graph: &str,
802    ) -> usize {
803        pr.title.len()
804            + pr.body.len()
805            + pr.files
806                .iter()
807                .map(|f| f.patch.as_ref().map_or(0, String::len))
808                .sum::<usize>()
809            + pr.files
810                .iter()
811                .map(|f| f.full_content.as_ref().map_or(0, String::len))
812                .sum::<usize>()
813            + pr.dep_enrichments
814                .iter()
815                .map(|d| d.body.len() + d.package_name.len() + d.github_url.len())
816                .sum::<usize>()
817            + ast_context.len()
818            + call_graph.len()
819            + PROMPT_OVERHEAD_CHARS
820    }
821
822    /// Reviews a pull request using the provider's API.
823    ///
824    /// Analyzes PR metadata and file diffs to provide structured review feedback.
825    ///
826    /// # Arguments
827    ///
828    /// * `pr` - Pull request details including files and diffs
829    ///
830    /// # Concurrency
831    ///
832    /// `ctx` is owned by each call; truncation counter mutations inside
833    /// `build_pr_review_user_prompt` are local to that invocation and are never
834    /// shared across concurrent calls.
835    ///
836    /// # Errors
837    ///
838    /// Returns an error if:
839    /// - API request fails (network, timeout, rate limit)
840    /// - Response cannot be parsed as valid JSON
841    #[instrument(skip(self, ctx), fields(pr_number = ctx.pr.number, repo = %format!("{}/{}", ctx.pr.owner, ctx.pr.repo)))]
842    async fn review_pr(
843        &self,
844        mut ctx: crate::ai::review_context::ReviewContext,
845        review_config: &crate::config::ReviewConfig,
846    ) -> Result<(super::types::PrReviewResponse, AiStats, Vec<String>)> {
847        debug!(model = %self.model(), "Calling {} API for PR review", self.name());
848
849        // Build request
850        let mut system_content = if let Some(override_prompt) =
851            super::context::load_system_prompt_override("pr_review_system").await
852        {
853            override_prompt
854        } else {
855            Self::build_pr_review_system_prompt(self.custom_guidance())
856        };
857
858        // Prepend repository instructions if available
859        if let Some(ref instructions) = ctx.pr.instructions {
860            // Escape XML delimiters to prevent tag injection
861            let escaped_instructions = instructions
862                .replace('&', "&amp;")
863                .replace('<', "&lt;")
864                .replace('>', "&gt;");
865            system_content = format!(
866                "<repo_instructions>\n{escaped_instructions}\n</repo_instructions>\n\n{system_content}"
867            );
868        }
869
870        // Assemble full prompt to measure actual size
871        let assembled_prompt = Self::build_pr_review_user_prompt(&mut ctx);
872        let actual_prompt_chars = assembled_prompt.len();
873        ctx.prompt_chars_final = actual_prompt_chars;
874
875        tracing::info!(
876            actual_prompt_chars,
877            max_chars = review_config.max_prompt_chars,
878            "PR review prompt assembled"
879        );
880
881        let mut messages = vec![
882            ChatMessage {
883                role: "system".to_string(),
884                content: Some(system_content),
885                reasoning: None,
886                cache_control: None,
887            },
888            ChatMessage {
889                role: "user".to_string(),
890                content: Some(assembled_prompt),
891                reasoning: None,
892                cache_control: None,
893            },
894        ];
895
896        // Inject cache control on system message for Anthropic
897        if self.is_anthropic()
898            && let Some(msg) = messages.first_mut()
899        {
900            msg.cache_control = Some(super::types::CacheControl::ephemeral());
901        }
902
903        let request = ChatCompletionRequest {
904            model: self.model().to_string(),
905            messages,
906            response_format: Some(ResponseFormat {
907                format_type: "json_object".to_string(),
908                json_schema: None,
909            }),
910            max_tokens: Some(self.max_tokens()),
911            temperature: Some(self.temperature()),
912        };
913
914        // Send request and parse JSON with retry logic
915        let (review, mut ai_stats, finish_reasons) = self
916            .send_and_parse::<super::types::PrReviewResponse>(&request)
917            .await?;
918
919        ai_stats.prompt_chars = actual_prompt_chars;
920
921        debug!(
922            verdict = %review.verdict,
923            input_tokens = ai_stats.input_tokens,
924            output_tokens = ai_stats.output_tokens,
925            duration_ms = ai_stats.duration_ms,
926            prompt_chars = ai_stats.prompt_chars,
927            "PR review complete with stats"
928        );
929
930        Ok((review, ai_stats, finish_reasons))
931    }
932
933    /// Suggests labels for a pull request using the provider's API.
934    ///
935    /// Analyzes PR title, body, and file paths to suggest relevant labels.
936    ///
937    /// # Arguments
938    ///
939    /// * `title` - Pull request title
940    /// * `body` - Pull request description
941    /// * `file_paths` - List of file paths changed in the PR
942    ///
943    /// # Errors
944    ///
945    /// Returns an error if:
946    /// - API request fails (network, timeout, rate limit)
947    /// - Response cannot be parsed as valid JSON
948    #[instrument(skip(self), fields(title = %title))]
949    async fn suggest_pr_labels(
950        &self,
951        title: &str,
952        body: &str,
953        file_paths: &[String],
954    ) -> Result<(Vec<String>, AiStats)> {
955        debug!(model = %self.model(), "Calling {} API for PR label suggestion", self.name());
956
957        // Build request
958        let system_content = if let Some(override_prompt) =
959            super::context::load_system_prompt_override("pr_label_system").await
960        {
961            override_prompt
962        } else {
963            Self::build_pr_label_system_prompt(self.custom_guidance())
964        };
965
966        let mut messages = vec![
967            ChatMessage {
968                role: "system".to_string(),
969                content: Some(system_content),
970                reasoning: None,
971                cache_control: None,
972            },
973            ChatMessage {
974                role: "user".to_string(),
975                content: Some(Self::build_pr_label_user_prompt(title, body, file_paths)),
976                reasoning: None,
977                cache_control: None,
978            },
979        ];
980
981        // Inject cache control on system message for Anthropic
982        if self.is_anthropic()
983            && let Some(msg) = messages.first_mut()
984        {
985            msg.cache_control = Some(super::types::CacheControl::ephemeral());
986        }
987
988        let request = ChatCompletionRequest {
989            model: self.model().to_string(),
990            messages,
991            response_format: Some(ResponseFormat {
992                format_type: "json_object".to_string(),
993                json_schema: None,
994            }),
995            max_tokens: Some(self.max_tokens()),
996            temperature: Some(self.temperature()),
997        };
998
999        // Send request and parse JSON with retry logic
1000        let (response, ai_stats, _finish_reasons) = self
1001            .send_and_parse::<super::types::PrLabelResponse>(&request)
1002            .await?;
1003
1004        debug!(
1005            label_count = response.suggested_labels.len(),
1006            input_tokens = ai_stats.input_tokens,
1007            output_tokens = ai_stats.output_tokens,
1008            duration_ms = ai_stats.duration_ms,
1009            "PR label suggestion complete with stats"
1010        );
1011
1012        Ok((response.suggested_labels, ai_stats))
1013    }
1014
1015    /// Builds the system prompt for PR review.
1016    #[must_use]
1017    fn build_pr_review_system_prompt(custom_guidance: Option<&str>) -> String {
1018        let context = super::context::load_custom_guidance(custom_guidance);
1019        build_pr_review_system_prompt(&context)
1020    }
1021
1022    /// Builds the user prompt for PR review.
1023    ///
1024    /// All user-controlled fields (title, body, filename, status, patch) are sanitized via
1025    /// [`sanitize_prompt_field`] before being written into the prompt to prevent prompt
1026    /// injection via XML tag smuggling.
1027    #[must_use]
1028    #[allow(clippy::too_many_lines)]
1029    fn build_pr_review_user_prompt(ctx: &mut crate::ai::review_context::ReviewContext) -> String {
1030        use std::fmt::Write;
1031
1032        let mut prompt = String::new();
1033
1034        prompt.push_str("<pull_request>\n");
1035        let _ = writeln!(prompt, "Title: {}\n", sanitize_prompt_field(&ctx.pr.title));
1036        let _ = writeln!(
1037            prompt,
1038            "Branch: {} -> {}\n",
1039            ctx.pr.head_branch, ctx.pr.base_branch
1040        );
1041
1042        // PR description - sanitize before truncation
1043        let sanitized_body = sanitize_prompt_field(&ctx.pr.body);
1044        let body = if sanitized_body.is_empty() {
1045            "[No description provided]".to_string()
1046        } else if sanitized_body.len() > MAX_BODY_LENGTH {
1047            format!(
1048                "{}...\n[APTU: description truncated by size budget -- do not speculate on missing content]",
1049                &sanitized_body[..MAX_BODY_LENGTH],
1050            )
1051        } else {
1052            sanitized_body
1053        };
1054        let _ = writeln!(prompt, "Description:\n{body}\n");
1055
1056        // File changes with limits
1057        prompt.push_str("Files Changed:\n");
1058        let mut total_diff_size = 0;
1059        let mut files_included = 0;
1060        let mut files_skipped = 0;
1061
1062        for i in 0..ctx.pr.files.len() {
1063            // Check file count limit
1064            if files_included >= MAX_FILES {
1065                files_skipped += 1;
1066                continue;
1067            }
1068
1069            let (filename, status, additions, deletions, patch, patch_truncated, full_content) = {
1070                let file = &ctx.pr.files[i];
1071                (
1072                    file.filename.clone(),
1073                    file.status.clone(),
1074                    file.additions,
1075                    file.deletions,
1076                    file.patch.clone(),
1077                    file.patch_truncated,
1078                    file.full_content.clone(),
1079                )
1080            };
1081
1082            let _ = writeln!(
1083                prompt,
1084                "- {} ({}) +{} -{}\n",
1085                sanitize_prompt_field(&filename),
1086                sanitize_prompt_field(&status),
1087                additions,
1088                deletions
1089            );
1090
1091            // Include patch if available (sanitize then truncate large patches)
1092            if let Some(patch) = patch {
1093                const MAX_PATCH_LENGTH: usize = 2000;
1094                let sanitized_patch = sanitize_prompt_field(&patch);
1095                let patch_content = if sanitized_patch.len() > MAX_PATCH_LENGTH {
1096                    format!(
1097                        "{}...\n[APTU: patch truncated by size budget -- do not speculate on missing content]",
1098                        &sanitized_patch[..MAX_PATCH_LENGTH],
1099                    )
1100                } else {
1101                    sanitized_patch
1102                };
1103
1104                // Check if adding this patch would exceed total diff size limit
1105                let patch_size = patch_content.len();
1106                if total_diff_size + patch_size > MAX_TOTAL_DIFF_SIZE {
1107                    let _ = writeln!(
1108                        prompt,
1109                        "```diff\n[APTU: patch omitted due to size budget -- do not speculate on missing content]\n```\n"
1110                    );
1111                    files_skipped += 1;
1112                    continue;
1113                }
1114
1115                // Add annotation if patch was truncated by GitHub API
1116                if patch_truncated {
1117                    let _ = writeln!(
1118                        prompt,
1119                        "[APTU: patch truncated by GitHub API -- do not speculate on missing content]\n```diff\n{patch_content}\n```\n"
1120                    );
1121                } else {
1122                    let _ = writeln!(prompt, "```diff\n{patch_content}\n```\n");
1123                }
1124                total_diff_size += patch_size;
1125            }
1126
1127            // Include full file content if available (cap at ctx.max_chars_per_file)
1128            if let Some(content) = full_content {
1129                let sanitized = sanitize_prompt_field(&content);
1130                let original_len = sanitized.len();
1131                let max_chars = ctx.max_chars_per_file;
1132                let is_truncated = original_len > max_chars;
1133                let displayed = if is_truncated {
1134                    let truncated = sanitized[..max_chars].to_string();
1135                    let truncated_len = truncated.len();
1136                    ctx.record_truncation(&filename, original_len, truncated_len);
1137                    truncated
1138                } else {
1139                    sanitized
1140                };
1141                let _ = writeln!(
1142                    prompt,
1143                    "<file_content path=\"{}\">\n{}\n</file_content>",
1144                    sanitize_prompt_field(&filename),
1145                    displayed
1146                );
1147                if is_truncated {
1148                    let _ = writeln!(
1149                        prompt,
1150                        "[APTU: file content truncated by size budget -- do not speculate on missing content]\n"
1151                    );
1152                } else {
1153                    let _ = writeln!(prompt);
1154                }
1155            }
1156
1157            files_included += 1;
1158        }
1159
1160        // Add truncation message if files were skipped
1161        if files_skipped > 0 {
1162            let _ = writeln!(
1163                prompt,
1164                "\n[{files_skipped} files omitted due to size limits (MAX_FILES={MAX_FILES}, MAX_TOTAL_DIFF_SIZE={MAX_TOTAL_DIFF_SIZE})]"
1165            );
1166        }
1167
1168        prompt.push_str("</pull_request>");
1169
1170        // Inject dependency release notes if available
1171        if !ctx.pr.dep_enrichments.is_empty() {
1172            prompt.push_str("\n<dependency_release_notes>\n");
1173            for dep in &ctx.pr.dep_enrichments {
1174                let _ = writeln!(
1175                    prompt,
1176                    "Package: {} ({})\nOld: {} -> New: {}\nGitHub: {}\n",
1177                    sanitize_prompt_field(&dep.package_name),
1178                    &dep.registry,
1179                    &dep.old_version,
1180                    &dep.new_version,
1181                    sanitize_prompt_field(&dep.github_url)
1182                );
1183                if !dep.body.is_empty() {
1184                    let _ = writeln!(
1185                        prompt,
1186                        "Release Notes:\n{}\n",
1187                        sanitize_prompt_field(&dep.body)
1188                    );
1189                } else if !dep.fetch_note.is_empty() {
1190                    let _ = writeln!(prompt, "Note: {}\n", &dep.fetch_note);
1191                }
1192            }
1193            prompt.push_str("</dependency_release_notes>\n");
1194        }
1195
1196        if !ctx.ast_context.is_empty() {
1197            prompt.push_str(&ctx.ast_context);
1198        }
1199        if !ctx.call_graph.is_empty() {
1200            prompt.push_str(&ctx.call_graph);
1201        }
1202        prompt.push_str(SCHEMA_PREAMBLE);
1203        prompt.push_str(crate::ai::prompts::PR_REVIEW_SCHEMA);
1204
1205        prompt
1206    }
1207
1208    /// Builds the system prompt for PR label suggestion.
1209    #[must_use]
1210    fn build_pr_label_system_prompt(custom_guidance: Option<&str>) -> String {
1211        let context = super::context::load_custom_guidance(custom_guidance);
1212        build_pr_label_system_prompt(&context)
1213    }
1214
1215    /// Builds the user prompt for PR label suggestion.
1216    #[must_use]
1217    fn build_pr_label_user_prompt(title: &str, body: &str, file_paths: &[String]) -> String {
1218        use std::fmt::Write;
1219
1220        let mut prompt = String::new();
1221
1222        // Sanitize title and body to prevent prompt injection
1223        let sanitized_title = sanitize_prompt_field(title);
1224        let sanitized_body = sanitize_prompt_field(body);
1225
1226        prompt.push_str("<pull_request>\n");
1227        let _ = writeln!(prompt, "Title: {sanitized_title}\n");
1228
1229        // PR description
1230        let body_content = if sanitized_body.is_empty() {
1231            "[No description provided]".to_string()
1232        } else if sanitized_body.len() > MAX_BODY_LENGTH {
1233            format!(
1234                "{}...\n[APTU: description truncated by size budget -- do not speculate on missing content]",
1235                &sanitized_body[..MAX_BODY_LENGTH],
1236            )
1237        } else {
1238            sanitized_body.clone()
1239        };
1240        let _ = writeln!(prompt, "Description:\n{body_content}\n");
1241
1242        // File paths
1243        if !file_paths.is_empty() {
1244            prompt.push_str("Files Changed:\n");
1245            for path in file_paths.iter().take(20) {
1246                let _ = writeln!(prompt, "- {path}");
1247            }
1248            if file_paths.len() > 20 {
1249                let _ = writeln!(prompt, "- ... and {} more files", file_paths.len() - 20);
1250            }
1251            prompt.push('\n');
1252        }
1253
1254        prompt.push_str("</pull_request>");
1255        prompt.push_str(SCHEMA_PREAMBLE);
1256        prompt.push_str(crate::ai::prompts::PR_LABEL_SCHEMA);
1257
1258        prompt
1259    }
1260}
1261
1262#[cfg(test)]
1263mod tests {
1264    use super::*;
1265
1266    /// Shared struct for `parse_ai_json` error-path tests.
1267    /// The field is only used via serde deserialization; `_message` silences `dead_code`.
1268    #[derive(Debug, serde::Deserialize)]
1269    struct ErrorTestResponse {
1270        _message: String,
1271    }
1272
1273    struct TestProvider;
1274
1275    impl AiProvider for TestProvider {
1276        fn name(&self) -> &'static str {
1277            "test"
1278        }
1279
1280        fn api_url(&self) -> &'static str {
1281            "https://test.example.com"
1282        }
1283
1284        fn api_key_env(&self) -> &'static str {
1285            "TEST_API_KEY"
1286        }
1287
1288        fn http_client(&self) -> &Client {
1289            unimplemented!()
1290        }
1291
1292        fn api_key(&self) -> &SecretString {
1293            unimplemented!()
1294        }
1295
1296        fn model(&self) -> &'static str {
1297            "test-model"
1298        }
1299
1300        fn max_tokens(&self) -> u32 {
1301            2048
1302        }
1303
1304        fn temperature(&self) -> f32 {
1305            0.3
1306        }
1307    }
1308
1309    #[test]
1310    fn test_build_system_prompt_contains_json_schema() {
1311        let system_prompt = TestProvider::build_system_prompt(None);
1312        // Schema description strings are unique to the schema file and must NOT appear in the
1313        // system prompt after moving schema injection to the user turn.
1314        assert!(
1315            !system_prompt
1316                .contains("A 2-3 sentence summary of what the issue is about and its impact")
1317        );
1318
1319        // Schema MUST appear in the user prompt
1320        let issue = IssueDetails::builder()
1321            .owner("test".to_string())
1322            .repo("repo".to_string())
1323            .number(1)
1324            .title("Test".to_string())
1325            .body("Body".to_string())
1326            .labels(vec![])
1327            .comments(vec![])
1328            .url("https://github.com/test/repo/issues/1".to_string())
1329            .build();
1330        let user_prompt = TestProvider::build_user_prompt(&issue);
1331        assert!(
1332            user_prompt
1333                .contains("A 2-3 sentence summary of what the issue is about and its impact")
1334        );
1335        assert!(user_prompt.contains("suggested_labels"));
1336    }
1337
1338    #[test]
1339    fn test_build_user_prompt_with_delimiters() {
1340        let issue = IssueDetails::builder()
1341            .owner("test".to_string())
1342            .repo("repo".to_string())
1343            .number(1)
1344            .title("Test issue".to_string())
1345            .body("This is the body".to_string())
1346            .labels(vec!["bug".to_string()])
1347            .comments(vec![])
1348            .url("https://github.com/test/repo/issues/1".to_string())
1349            .build();
1350
1351        let prompt = TestProvider::build_user_prompt(&issue);
1352        assert!(prompt.starts_with("<issue_content>"));
1353        assert!(prompt.contains("</issue_content>"));
1354        assert!(prompt.contains("Respond with valid JSON matching this schema"));
1355        assert!(prompt.contains("Title: Test issue"));
1356        assert!(prompt.contains("This is the body"));
1357        assert!(prompt.contains("Existing Labels: bug"));
1358    }
1359
1360    #[test]
1361    fn test_build_user_prompt_truncates_long_body() {
1362        let long_body = "x".repeat(5000);
1363        let issue = IssueDetails::builder()
1364            .owner("test".to_string())
1365            .repo("repo".to_string())
1366            .number(1)
1367            .title("Test".to_string())
1368            .body(long_body)
1369            .labels(vec![])
1370            .comments(vec![])
1371            .url("https://github.com/test/repo/issues/1".to_string())
1372            .build();
1373
1374        let prompt = TestProvider::build_user_prompt(&issue);
1375        assert!(prompt.contains(
1376            "[APTU: body truncated by size budget -- do not speculate on missing content]"
1377        ));
1378    }
1379
1380    #[test]
1381    fn test_build_user_prompt_empty_body() {
1382        let issue = IssueDetails::builder()
1383            .owner("test".to_string())
1384            .repo("repo".to_string())
1385            .number(1)
1386            .title("Test".to_string())
1387            .body(String::new())
1388            .labels(vec![])
1389            .comments(vec![])
1390            .url("https://github.com/test/repo/issues/1".to_string())
1391            .build();
1392
1393        let prompt = TestProvider::build_user_prompt(&issue);
1394        assert!(prompt.contains("[No description provided]"));
1395    }
1396
1397    #[test]
1398    fn test_build_create_system_prompt_contains_json_schema() {
1399        let system_prompt = TestProvider::build_create_system_prompt(None);
1400        // Schema description strings are unique to the schema file and must NOT appear in system prompt.
1401        assert!(
1402            !system_prompt
1403                .contains("Well-formatted issue title following conventional commit style")
1404        );
1405
1406        // Schema MUST appear in the user prompt
1407        let user_prompt =
1408            TestProvider::build_create_user_prompt("My title", "My body", "test/repo");
1409        assert!(
1410            user_prompt.contains("Well-formatted issue title following conventional commit style")
1411        );
1412        assert!(user_prompt.contains("formatted_body"));
1413    }
1414
1415    #[test]
1416    fn test_build_pr_review_user_prompt_respects_file_limit() {
1417        use super::super::types::{PrDetails, PrFile};
1418
1419        let mut files = Vec::new();
1420        for i in 0..25 {
1421            files.push(PrFile {
1422                filename: format!("file{i}.rs"),
1423                status: "modified".to_string(),
1424                additions: 10,
1425                deletions: 5,
1426                patch: Some(format!("patch content {i}")),
1427                patch_truncated: false,
1428                full_content: None,
1429            });
1430        }
1431
1432        let pr = PrDetails {
1433            owner: "test".to_string(),
1434            repo: "repo".to_string(),
1435            number: 1,
1436            title: "Test PR".to_string(),
1437            body: "Description".to_string(),
1438            head_branch: "feature".to_string(),
1439            base_branch: "main".to_string(),
1440            url: "https://github.com/test/repo/pull/1".to_string(),
1441            files,
1442            labels: vec![],
1443            head_sha: String::new(),
1444            review_comments: vec![],
1445            instructions: None,
1446            dep_enrichments: vec![],
1447        };
1448
1449        let prompt = TestProvider::build_pr_review_user_prompt(
1450            &mut crate::ai::review_context::ReviewContext {
1451                pr,
1452                ast_context: String::new(),
1453                call_graph: String::new(),
1454                inferred_repo_path: None,
1455                cwd_inferred: false,
1456                max_chars_per_file: 16_000,
1457                files_truncated: 0,
1458                truncated_chars_dropped: 0,
1459                ..Default::default()
1460            },
1461        );
1462        assert!(prompt.contains("files omitted due to size limits"));
1463        assert!(prompt.contains("MAX_FILES=20"));
1464    }
1465
1466    #[test]
1467    fn test_build_pr_review_user_prompt_respects_diff_size_limit() {
1468        use super::super::types::{PrDetails, PrFile};
1469
1470        // Create patches that will exceed the limit when combined
1471        // Each patch is ~30KB, so two will exceed 50KB limit
1472        let patch1 = "x".repeat(30_000);
1473        let patch2 = "y".repeat(30_000);
1474
1475        let files = vec![
1476            PrFile {
1477                filename: "file1.rs".to_string(),
1478                status: "modified".to_string(),
1479                additions: 100,
1480                deletions: 50,
1481                patch: Some(patch1),
1482                patch_truncated: false,
1483                full_content: None,
1484            },
1485            PrFile {
1486                filename: "file2.rs".to_string(),
1487                status: "modified".to_string(),
1488                additions: 100,
1489                deletions: 50,
1490                patch: Some(patch2),
1491                patch_truncated: false,
1492                full_content: None,
1493            },
1494        ];
1495
1496        let pr = PrDetails {
1497            owner: "test".to_string(),
1498            repo: "repo".to_string(),
1499            number: 1,
1500            title: "Test PR".to_string(),
1501            body: "Description".to_string(),
1502            head_branch: "feature".to_string(),
1503            base_branch: "main".to_string(),
1504            url: "https://github.com/test/repo/pull/1".to_string(),
1505            files,
1506            labels: vec![],
1507            head_sha: String::new(),
1508            review_comments: vec![],
1509            instructions: None,
1510            dep_enrichments: vec![],
1511        };
1512
1513        let prompt = TestProvider::build_pr_review_user_prompt(
1514            &mut crate::ai::review_context::ReviewContext {
1515                pr,
1516                ast_context: String::new(),
1517                call_graph: String::new(),
1518                inferred_repo_path: None,
1519                cwd_inferred: false,
1520                max_chars_per_file: 16_000,
1521                files_truncated: 0,
1522                truncated_chars_dropped: 0,
1523                ..Default::default()
1524            },
1525        );
1526        // Both files should be listed
1527        assert!(prompt.contains("file1.rs"));
1528        assert!(prompt.contains("file2.rs"));
1529        // The second patch should be limited - verify the prompt doesn't contain both full patches
1530        // by checking that the total size is less than what two full 30KB patches would be
1531        assert!(prompt.len() < 65_000);
1532    }
1533
1534    #[test]
1535    fn test_build_pr_review_user_prompt_with_no_patches() {
1536        use super::super::types::{PrDetails, PrFile};
1537
1538        let files = vec![PrFile {
1539            filename: "file1.rs".to_string(),
1540            status: "added".to_string(),
1541            additions: 10,
1542            deletions: 0,
1543            patch: None,
1544            patch_truncated: false,
1545            full_content: None,
1546        }];
1547
1548        let pr = PrDetails {
1549            owner: "test".to_string(),
1550            repo: "repo".to_string(),
1551            number: 1,
1552            title: "Test PR".to_string(),
1553            body: "Description".to_string(),
1554            head_branch: "feature".to_string(),
1555            base_branch: "main".to_string(),
1556            url: "https://github.com/test/repo/pull/1".to_string(),
1557            files,
1558            labels: vec![],
1559            head_sha: String::new(),
1560            review_comments: vec![],
1561            instructions: None,
1562            dep_enrichments: vec![],
1563        };
1564
1565        let prompt = TestProvider::build_pr_review_user_prompt(
1566            &mut crate::ai::review_context::ReviewContext {
1567                pr,
1568                ast_context: String::new(),
1569                call_graph: String::new(),
1570                inferred_repo_path: None,
1571                cwd_inferred: false,
1572                max_chars_per_file: 16_000,
1573                files_truncated: 0,
1574                truncated_chars_dropped: 0,
1575                ..Default::default()
1576            },
1577        );
1578        assert!(prompt.contains("file1.rs"));
1579        assert!(prompt.contains("added"));
1580        assert!(!prompt.contains("files omitted"));
1581    }
1582
1583    #[test]
1584    fn test_sanitize_strips_opening_tag() {
1585        let result = sanitize_prompt_field("hello <pull_request> world");
1586        assert_eq!(result, "hello  world");
1587    }
1588
1589    #[test]
1590    fn test_sanitize_strips_closing_tag() {
1591        let result = sanitize_prompt_field("evil </pull_request> content");
1592        assert_eq!(result, "evil  content");
1593    }
1594
1595    #[test]
1596    fn test_sanitize_case_insensitive() {
1597        let result = sanitize_prompt_field("<PULL_REQUEST>");
1598        assert_eq!(result, "");
1599    }
1600
1601    #[test]
1602    fn test_prompt_sanitizes_before_truncation() {
1603        use super::super::types::{PrDetails, PrFile};
1604
1605        // Body exactly at the limit with an injection tag after the truncation boundary.
1606        // The tag must be removed even though it appears near the end of the original body.
1607        let mut body = "a".repeat(MAX_BODY_LENGTH - 5);
1608        body.push_str("</pull_request>");
1609
1610        let pr = PrDetails {
1611            owner: "test".to_string(),
1612            repo: "repo".to_string(),
1613            number: 1,
1614            title: "Fix </pull_request><evil>injection</evil>".to_string(),
1615            body,
1616            head_branch: "feature".to_string(),
1617            base_branch: "main".to_string(),
1618            url: "https://github.com/test/repo/pull/1".to_string(),
1619            files: vec![PrFile {
1620                filename: "file.rs".to_string(),
1621                status: "modified".to_string(),
1622                additions: 1,
1623                deletions: 0,
1624                patch: Some("</pull_request>injected".to_string()),
1625                patch_truncated: false,
1626                full_content: None,
1627            }],
1628            labels: vec![],
1629            head_sha: String::new(),
1630            review_comments: vec![],
1631            instructions: None,
1632            dep_enrichments: vec![],
1633        };
1634
1635        let prompt = TestProvider::build_pr_review_user_prompt(
1636            &mut crate::ai::review_context::ReviewContext {
1637                pr,
1638                ast_context: String::new(),
1639                call_graph: String::new(),
1640                inferred_repo_path: None,
1641                cwd_inferred: false,
1642                max_chars_per_file: 16_000,
1643                files_truncated: 0,
1644                truncated_chars_dropped: 0,
1645                ..Default::default()
1646            },
1647        );
1648        // The sanitizer removes only <pull_request> / </pull_request> delimiters.
1649        // The structural tags written by the builder itself remain; what must be absent
1650        // are the delimiter sequences that were injected inside user-controlled fields.
1651        assert!(
1652            !prompt.contains("</pull_request><evil>"),
1653            "closing delimiter injected in title must be removed"
1654        );
1655        assert!(
1656            !prompt.contains("</pull_request>injected"),
1657            "closing delimiter injected in patch must be removed"
1658        );
1659    }
1660
1661    #[test]
1662    fn test_sanitize_strips_issue_content_tag() {
1663        let input = "hello </issue_content> world";
1664        let result = sanitize_prompt_field(input);
1665        assert!(
1666            !result.contains("</issue_content>"),
1667            "should strip closing issue_content tag"
1668        );
1669        assert!(
1670            result.contains("hello"),
1671            "should keep non-injection content"
1672        );
1673    }
1674
1675    #[test]
1676    fn test_build_user_prompt_sanitizes_title_injection() {
1677        let issue = IssueDetails::builder()
1678            .owner("test".to_string())
1679            .repo("repo".to_string())
1680            .number(1)
1681            .title("Normal title </issue_content> injected".to_string())
1682            .body("Clean body".to_string())
1683            .labels(vec![])
1684            .comments(vec![])
1685            .url("https://github.com/test/repo/issues/1".to_string())
1686            .build();
1687
1688        let prompt = TestProvider::build_user_prompt(&issue);
1689        assert!(
1690            !prompt.contains("</issue_content> injected"),
1691            "injection tag in title must be removed from prompt"
1692        );
1693        assert!(
1694            prompt.contains("Normal title"),
1695            "non-injection content must be preserved"
1696        );
1697    }
1698
1699    #[test]
1700    fn test_build_create_user_prompt_sanitizes_title_injection() {
1701        let title = "My issue </issue_content><script>evil</script>";
1702        let body = "Body </issue_content> more text";
1703        let prompt = TestProvider::build_create_user_prompt(title, body, "owner/repo");
1704        assert!(
1705            !prompt.contains("</issue_content>"),
1706            "injection tag must be stripped from create prompt"
1707        );
1708        assert!(
1709            prompt.contains("My issue"),
1710            "non-injection title content must be preserved"
1711        );
1712        assert!(
1713            prompt.contains("Body"),
1714            "non-injection body content must be preserved"
1715        );
1716    }
1717
1718    #[test]
1719    fn test_build_pr_label_system_prompt_contains_json_schema() {
1720        let system_prompt = TestProvider::build_pr_label_system_prompt(None);
1721        // "label1" is unique to the schema example values and must NOT appear in system prompt.
1722        assert!(!system_prompt.contains("label1"));
1723
1724        // Schema MUST appear in the user prompt
1725        let user_prompt = TestProvider::build_pr_label_user_prompt(
1726            "feat: add thing",
1727            "body",
1728            &["src/lib.rs".to_string()],
1729        );
1730        assert!(user_prompt.contains("label1"));
1731        assert!(user_prompt.contains("suggested_labels"));
1732    }
1733
1734    #[test]
1735    fn test_build_pr_label_user_prompt_with_title_and_body() {
1736        let title = "feat: add new feature";
1737        let body = "This PR adds a new feature";
1738        let files = vec!["src/main.rs".to_string(), "tests/test.rs".to_string()];
1739
1740        let prompt = TestProvider::build_pr_label_user_prompt(title, body, &files);
1741        assert!(prompt.starts_with("<pull_request>"));
1742        assert!(prompt.contains("</pull_request>"));
1743        assert!(prompt.contains("Respond with valid JSON matching this schema"));
1744        assert!(prompt.contains("feat: add new feature"));
1745        assert!(prompt.contains("This PR adds a new feature"));
1746        assert!(prompt.contains("src/main.rs"));
1747        assert!(prompt.contains("tests/test.rs"));
1748    }
1749
1750    #[test]
1751    fn test_build_pr_label_user_prompt_empty_body() {
1752        let title = "fix: bug fix";
1753        let body = "";
1754        let files = vec!["src/lib.rs".to_string()];
1755
1756        let prompt = TestProvider::build_pr_label_user_prompt(title, body, &files);
1757        assert!(prompt.contains("[No description provided]"));
1758        assert!(prompt.contains("src/lib.rs"));
1759    }
1760
1761    #[test]
1762    fn test_build_pr_label_user_prompt_truncates_long_body() {
1763        let title = "test";
1764        let long_body = "x".repeat(5000);
1765        let files = vec![];
1766
1767        let prompt = TestProvider::build_pr_label_user_prompt(title, &long_body, &files);
1768        assert!(prompt.contains(
1769            "[APTU: description truncated by size budget -- do not speculate on missing content]"
1770        ));
1771    }
1772
1773    #[test]
1774    fn test_build_pr_label_user_prompt_respects_file_limit() {
1775        let title = "test";
1776        let body = "test";
1777        let mut files = Vec::new();
1778        for i in 0..25 {
1779            files.push(format!("file{i}.rs"));
1780        }
1781
1782        let prompt = TestProvider::build_pr_label_user_prompt(title, body, &files);
1783        assert!(prompt.contains("file0.rs"));
1784        assert!(prompt.contains("file19.rs"));
1785        assert!(!prompt.contains("file20.rs"));
1786        assert!(prompt.contains("... and 5 more files"));
1787    }
1788
1789    #[test]
1790    fn test_build_pr_label_user_prompt_empty_files() {
1791        let title = "test";
1792        let body = "test";
1793        let files: Vec<String> = vec![];
1794
1795        let prompt = TestProvider::build_pr_label_user_prompt(title, body, &files);
1796        assert!(prompt.contains("Title: test"));
1797        assert!(prompt.contains("Description:\ntest"));
1798        assert!(!prompt.contains("Files Changed:"));
1799    }
1800
1801    #[test]
1802    fn test_parse_ai_json_with_valid_json() {
1803        #[derive(serde::Deserialize)]
1804        struct TestResponse {
1805            message: String,
1806        }
1807
1808        let json = r#"{"message": "hello"}"#;
1809        let result: Result<TestResponse> = parse_ai_json(json, "test-provider");
1810        assert!(result.is_ok());
1811        let response = result.unwrap();
1812        assert_eq!(response.message, "hello");
1813    }
1814
1815    #[test]
1816    fn test_parse_ai_json_with_truncated_json() {
1817        let json = r#"{"message": "hello"#;
1818        let result: Result<ErrorTestResponse> = parse_ai_json(json, "test-provider");
1819        assert!(result.is_err());
1820        let err = result.unwrap_err();
1821        assert!(
1822            err.to_string()
1823                .contains("Truncated response from test-provider")
1824        );
1825    }
1826
1827    #[test]
1828    fn test_parse_ai_json_with_malformed_json() {
1829        let json = r#"{"message": invalid}"#;
1830        let result: Result<ErrorTestResponse> = parse_ai_json(json, "test-provider");
1831        assert!(result.is_err());
1832        let err = result.unwrap_err();
1833        assert!(err.to_string().contains("Invalid JSON response from AI"));
1834    }
1835
1836    #[tokio::test]
1837    async fn test_load_system_prompt_override_returns_none_when_absent() {
1838        let result =
1839            super::super::context::load_system_prompt_override("__nonexistent_test_override__")
1840                .await;
1841        assert!(result.is_none());
1842    }
1843
1844    #[tokio::test]
1845    async fn test_load_system_prompt_override_returns_content_when_present() {
1846        use std::io::Write;
1847        let dir = tempfile::tempdir().expect("create tempdir");
1848        let file_path = dir.path().join("test_override.md");
1849        let mut f = std::fs::File::create(&file_path).expect("create file");
1850        writeln!(f, "Custom override content").expect("write file");
1851        drop(f);
1852
1853        let content = tokio::fs::read_to_string(&file_path).await.ok();
1854        assert_eq!(content.as_deref(), Some("Custom override content\n"));
1855    }
1856
1857    #[test]
1858    fn test_build_pr_review_prompt_omits_call_graph_when_oversized() {
1859        use super::super::types::{PrDetails, PrFile};
1860
1861        // Arrange: simulate review_pr dropping call_graph due to budget.
1862        // When call_graph is oversized, review_pr clears it before calling build_pr_review_user_prompt.
1863        let pr = PrDetails {
1864            owner: "test".to_string(),
1865            repo: "repo".to_string(),
1866            number: 1,
1867            title: "Budget drop test".to_string(),
1868            body: "body".to_string(),
1869            head_branch: "feat".to_string(),
1870            base_branch: "main".to_string(),
1871            url: "https://github.com/test/repo/pull/1".to_string(),
1872            files: vec![PrFile {
1873                filename: "lib.rs".to_string(),
1874                status: "modified".to_string(),
1875                additions: 1,
1876                deletions: 0,
1877                patch: Some("+line".to_string()),
1878                patch_truncated: false,
1879                full_content: None,
1880            }],
1881            labels: vec![],
1882            head_sha: String::new(),
1883            review_comments: vec![],
1884            instructions: None,
1885            dep_enrichments: vec![],
1886        };
1887
1888        // Act: call build_pr_review_user_prompt with empty call_graph (dropped by review_pr)
1889        // and non-empty ast_context (retained because it fits after call_graph drop)
1890        let ast_context = "Y".repeat(500);
1891        let call_graph = "";
1892        let mut ctx = crate::ai::review_context::ReviewContext {
1893            pr,
1894            ast_context: ast_context.clone(),
1895            call_graph: call_graph.to_string(),
1896            inferred_repo_path: None,
1897            cwd_inferred: false,
1898            max_chars_per_file: 16_000,
1899            files_truncated: 0,
1900            truncated_chars_dropped: 0,
1901            ..Default::default()
1902        };
1903        let prompt = TestProvider::build_pr_review_user_prompt(&mut ctx);
1904
1905        // Assert: call_graph absent, ast_context present
1906        assert!(
1907            !prompt.contains(&"X".repeat(10)),
1908            "call_graph content must not appear in prompt after budget drop"
1909        );
1910        assert!(
1911            prompt.contains(&"Y".repeat(10)),
1912            "ast_context content must appear in prompt (fits within budget)"
1913        );
1914    }
1915
1916    #[test]
1917    fn test_build_pr_review_prompt_omits_ast_after_call_graph() {
1918        use super::super::types::{PrDetails, PrFile};
1919
1920        // Arrange: simulate review_pr dropping both call_graph and ast_context due to budget.
1921        let pr = PrDetails {
1922            owner: "test".to_string(),
1923            repo: "repo".to_string(),
1924            number: 1,
1925            title: "Budget drop test".to_string(),
1926            body: "body".to_string(),
1927            head_branch: "feat".to_string(),
1928            base_branch: "main".to_string(),
1929            url: "https://github.com/test/repo/pull/1".to_string(),
1930            files: vec![PrFile {
1931                filename: "lib.rs".to_string(),
1932                status: "modified".to_string(),
1933                additions: 1,
1934                deletions: 0,
1935                patch: Some("+line".to_string()),
1936                patch_truncated: false,
1937                full_content: None,
1938            }],
1939            labels: vec![],
1940            head_sha: String::new(),
1941            review_comments: vec![],
1942            instructions: None,
1943            dep_enrichments: vec![],
1944        };
1945
1946        // Act: call build_pr_review_user_prompt with both empty (dropped by review_pr)
1947        let ast_context = "";
1948        let call_graph = "";
1949        let mut ctx = crate::ai::review_context::ReviewContext {
1950            pr,
1951            ast_context: ast_context.to_string(),
1952            call_graph: call_graph.to_string(),
1953            inferred_repo_path: None,
1954            cwd_inferred: false,
1955            max_chars_per_file: 16_000,
1956            files_truncated: 0,
1957            truncated_chars_dropped: 0,
1958            ..Default::default()
1959        };
1960        let prompt = TestProvider::build_pr_review_user_prompt(&mut ctx);
1961
1962        // Assert: both absent, PR title retained
1963        assert!(
1964            !prompt.contains(&"C".repeat(10)),
1965            "call_graph content must not appear after budget drop"
1966        );
1967        assert!(
1968            !prompt.contains(&"A".repeat(10)),
1969            "ast_context content must not appear after budget drop"
1970        );
1971        assert!(
1972            prompt.contains("Budget drop test"),
1973            "PR title must be retained in prompt"
1974        );
1975    }
1976
1977    #[test]
1978    fn test_build_pr_review_prompt_drops_patches_when_over_budget() {
1979        use super::super::types::{PrDetails, PrFile};
1980
1981        // Arrange: simulate review_pr dropping patches due to budget.
1982        // Create 3 files with patches of different sizes.
1983        let pr = PrDetails {
1984            owner: "test".to_string(),
1985            repo: "repo".to_string(),
1986            number: 1,
1987            title: "Patch drop test".to_string(),
1988            body: "body".to_string(),
1989            head_branch: "feat".to_string(),
1990            base_branch: "main".to_string(),
1991            url: "https://github.com/test/repo/pull/1".to_string(),
1992            files: vec![
1993                PrFile {
1994                    filename: "large.rs".to_string(),
1995                    status: "modified".to_string(),
1996                    additions: 100,
1997                    deletions: 50,
1998                    patch: Some("L".repeat(5000)),
1999                    patch_truncated: false,
2000                    full_content: None,
2001                },
2002                PrFile {
2003                    filename: "medium.rs".to_string(),
2004                    status: "modified".to_string(),
2005                    additions: 50,
2006                    deletions: 25,
2007                    patch: Some("M".repeat(3000)),
2008                    patch_truncated: false,
2009                    full_content: None,
2010                },
2011                PrFile {
2012                    filename: "small.rs".to_string(),
2013                    status: "modified".to_string(),
2014                    additions: 10,
2015                    deletions: 5,
2016                    patch: Some("S".repeat(1000)),
2017                    patch_truncated: false,
2018                    full_content: None,
2019                },
2020            ],
2021            labels: vec![],
2022            head_sha: String::new(),
2023            review_comments: vec![],
2024            instructions: None,
2025            dep_enrichments: vec![],
2026        };
2027
2028        // Act: simulate review_pr dropping largest patches first
2029        let mut pr_mut = pr.clone();
2030        pr_mut.files[0].patch = None; // Drop largest patch
2031        pr_mut.files[1].patch = None; // Drop medium patch
2032        // Keep smallest patch
2033
2034        let ast_context = "";
2035        let call_graph = "";
2036        let mut ctx = crate::ai::review_context::ReviewContext {
2037            pr: pr_mut,
2038            ast_context: ast_context.to_string(),
2039            call_graph: call_graph.to_string(),
2040            inferred_repo_path: None,
2041            cwd_inferred: false,
2042            max_chars_per_file: 16_000,
2043            files_truncated: 0,
2044            truncated_chars_dropped: 0,
2045            ..Default::default()
2046        };
2047        let prompt = TestProvider::build_pr_review_user_prompt(&mut ctx);
2048
2049        // Assert: largest patches absent, smallest present
2050        assert!(
2051            !prompt.contains(&"L".repeat(10)),
2052            "largest patch must be absent after drop"
2053        );
2054        assert!(
2055            !prompt.contains(&"M".repeat(10)),
2056            "medium patch must be absent after drop"
2057        );
2058        assert!(
2059            prompt.contains(&"S".repeat(10)),
2060            "smallest patch must be present"
2061        );
2062    }
2063
2064    #[test]
2065    fn test_build_pr_review_prompt_drops_full_content_as_last_resort() {
2066        use super::super::types::{PrDetails, PrFile};
2067
2068        // Arrange: simulate review_pr dropping full_content as last resort.
2069        let pr = PrDetails {
2070            owner: "test".to_string(),
2071            repo: "repo".to_string(),
2072            number: 1,
2073            title: "Full content drop test".to_string(),
2074            body: "body".to_string(),
2075            head_branch: "feat".to_string(),
2076            base_branch: "main".to_string(),
2077            url: "https://github.com/test/repo/pull/1".to_string(),
2078            files: vec![
2079                PrFile {
2080                    filename: "file1.rs".to_string(),
2081                    status: "modified".to_string(),
2082                    additions: 10,
2083                    deletions: 5,
2084                    patch: None,
2085                    patch_truncated: false,
2086                    full_content: Some("F".repeat(5000)),
2087                },
2088                PrFile {
2089                    filename: "file2.rs".to_string(),
2090                    status: "modified".to_string(),
2091                    additions: 10,
2092                    deletions: 5,
2093                    patch: None,
2094                    patch_truncated: false,
2095                    full_content: Some("C".repeat(3000)),
2096                },
2097            ],
2098            labels: vec![],
2099            head_sha: String::new(),
2100            review_comments: vec![],
2101            instructions: None,
2102            dep_enrichments: vec![],
2103        };
2104
2105        // Act: simulate review_pr dropping all full_content
2106        let mut pr_mut = pr.clone();
2107        for file in &mut pr_mut.files {
2108            file.full_content = None;
2109        }
2110
2111        let ast_context = "";
2112        let call_graph = "";
2113        let mut ctx = crate::ai::review_context::ReviewContext {
2114            pr: pr_mut,
2115            ast_context: ast_context.to_string(),
2116            call_graph: call_graph.to_string(),
2117            inferred_repo_path: None,
2118            cwd_inferred: false,
2119            max_chars_per_file: 16_000,
2120            files_truncated: 0,
2121            truncated_chars_dropped: 0,
2122            ..Default::default()
2123        };
2124        let prompt = TestProvider::build_pr_review_user_prompt(&mut ctx);
2125
2126        // Assert: no file_content XML blocks appear
2127        assert!(
2128            !prompt.contains("<file_content"),
2129            "file_content blocks must not appear when full_content is cleared"
2130        );
2131        assert!(
2132            !prompt.contains(&"F".repeat(10)),
2133            "full_content from file1 must not appear"
2134        );
2135        assert!(
2136            !prompt.contains(&"C".repeat(10)),
2137            "full_content from file2 must not appear"
2138        );
2139    }
2140
2141    #[test]
2142    fn test_redact_api_error_body_truncates() {
2143        // Arrange: Create a long error body
2144        let long_body = "x".repeat(300);
2145
2146        // Act: Redact the error body
2147        let result = redact_api_error_body(&long_body);
2148
2149        // Assert: Result should be truncated and marked
2150        assert!(result.len() < long_body.len());
2151        assert!(result.ends_with("[truncated]"));
2152        assert_eq!(result.len(), 200 + " [truncated]".len());
2153    }
2154
2155    #[test]
2156    fn test_redact_api_error_body_short() {
2157        // Arrange: Create a short error body
2158        let short_body = "Short error";
2159
2160        // Act: Redact the error body
2161        let result = redact_api_error_body(short_body);
2162
2163        // Assert: Result should be unchanged
2164        assert_eq!(result, short_body);
2165    }
2166
2167    #[test]
2168    fn test_full_content_truncation_annotation_added() {
2169        use super::super::types::{PrDetails, PrFile};
2170
2171        // Arrange: PR with file content that will be truncated
2172        let pr = PrDetails {
2173            owner: "test".to_string(),
2174            repo: "repo".to_string(),
2175            number: 1,
2176            title: "Test PR".to_string(),
2177            body: "body".to_string(),
2178            head_branch: "feat".to_string(),
2179            base_branch: "main".to_string(),
2180            url: "https://github.com/test/repo/pull/1".to_string(),
2181            files: vec![PrFile {
2182                filename: "large_file.rs".to_string(),
2183                status: "modified".to_string(),
2184                additions: 10,
2185                deletions: 5,
2186                patch: Some("--- a/file\n+++ b/file\n@@ -1 @@\n+added".to_string()),
2187                patch_truncated: false,
2188                full_content: Some("x".repeat(10000)), // Will be truncated
2189            }],
2190            labels: vec![],
2191            head_sha: String::new(),
2192            review_comments: vec![],
2193            instructions: None,
2194            dep_enrichments: vec![],
2195        };
2196
2197        // Act: build prompt with cap below content size to trigger truncation
2198        let prompt = TestProvider::build_pr_review_user_prompt(
2199            &mut crate::ai::review_context::ReviewContext {
2200                pr,
2201                ast_context: String::new(),
2202                call_graph: String::new(),
2203                inferred_repo_path: None,
2204                cwd_inferred: false,
2205                max_chars_per_file: 4_000,
2206                files_truncated: 0,
2207                truncated_chars_dropped: 0,
2208                ..Default::default()
2209            },
2210        );
2211
2212        // Assert: truncation annotation is present outside file_content tags
2213        assert!(
2214            prompt.contains("[APTU: file content truncated by size budget -- do not speculate on missing content]"),
2215            "truncation annotation must be present for truncated full_content"
2216        );
2217        // Verify annotation is outside the XML tags
2218        let file_content_end = prompt
2219            .find("</file_content>")
2220            .expect("file_content tags must exist");
2221        let annotation_pos = prompt
2222            .find("[APTU: file content truncated")
2223            .expect("annotation must exist");
2224        assert!(
2225            annotation_pos > file_content_end,
2226            "annotation must be outside </file_content> tags"
2227        );
2228    }
2229
2230    #[test]
2231    fn test_all_truncation_annotations_consistent_format() {
2232        use super::super::types::{IssueDetails, PrDetails, PrFile};
2233
2234        // Arrange: issue with truncated body
2235        let issue = IssueDetails::builder()
2236            .owner("test".to_string())
2237            .repo("repo".to_string())
2238            .number(1)
2239            .title("Test Issue".to_string())
2240            .body("x".repeat(40000)) // Will be truncated
2241            .labels(vec![])
2242            .url("https://github.com/test/repo/issues/1".to_string())
2243            .comments(vec![])
2244            .build();
2245
2246        // Act: build triage prompt
2247        let prompt = TestProvider::build_user_prompt(&issue);
2248
2249        // Assert: body truncation uses consistent format
2250        assert!(
2251            prompt.contains(
2252                "[APTU: body truncated by size budget -- do not speculate on missing content]"
2253            ),
2254            "body truncation must use [APTU: ...] format"
2255        );
2256
2257        // Arrange: PR with truncated description and patch
2258        let pr = PrDetails {
2259            owner: "test".to_string(),
2260            repo: "repo".to_string(),
2261            number: 1,
2262            title: "Test PR".to_string(),
2263            body: "x".repeat(40000), // Will be truncated
2264            head_branch: "feat".to_string(),
2265            base_branch: "main".to_string(),
2266            url: "https://github.com/test/repo/pull/1".to_string(),
2267            files: vec![
2268                PrFile {
2269                    filename: "file1.rs".to_string(),
2270                    status: "modified".to_string(),
2271                    additions: 10,
2272                    deletions: 5,
2273                    patch: Some("x".repeat(3000)), // Will be truncated
2274                    patch_truncated: false,
2275                    full_content: None,
2276                },
2277                PrFile {
2278                    filename: "file2.rs".to_string(),
2279                    status: "modified".to_string(),
2280                    additions: 10,
2281                    deletions: 5,
2282                    patch: Some("--- a/file\n+++ b/file\n@@ -1 @@\n+added".to_string()),
2283                    patch_truncated: true, // GitHub API truncated
2284                    full_content: None,
2285                },
2286            ],
2287            labels: vec![],
2288            head_sha: String::new(),
2289            review_comments: vec![],
2290            instructions: None,
2291            dep_enrichments: vec![],
2292        };
2293
2294        // Act: build review prompt
2295        let prompt = TestProvider::build_pr_review_user_prompt(
2296            &mut crate::ai::review_context::ReviewContext {
2297                pr,
2298                ast_context: String::new(),
2299                call_graph: String::new(),
2300                inferred_repo_path: None,
2301                cwd_inferred: false,
2302                max_chars_per_file: 16_000,
2303                files_truncated: 0,
2304                truncated_chars_dropped: 0,
2305                ..Default::default()
2306            },
2307        );
2308
2309        // Assert: all truncation annotations use consistent [APTU: ...] format
2310        assert!(
2311            prompt.contains("[APTU: description truncated by size budget -- do not speculate on missing content]"),
2312            "description truncation must use [APTU: ...] format"
2313        );
2314        assert!(
2315            prompt.contains(
2316                "[APTU: patch truncated by size budget -- do not speculate on missing content]"
2317            ),
2318            "patch budget truncation must use [APTU: ...] format"
2319        );
2320        assert!(
2321            prompt.contains(
2322                "[APTU: patch truncated by GitHub API -- do not speculate on missing content]"
2323            ),
2324            "GitHub API patch truncation must use [APTU: ...] format"
2325        );
2326    }
2327
2328    #[test]
2329    fn test_no_dep_enrichment_when_no_manifest_files() {
2330        use super::super::types::{PrDetails, PrFile};
2331
2332        // Arrange: PR with no manifest files (regression guard)
2333        let pr = PrDetails {
2334            owner: "test".to_string(),
2335            repo: "repo".to_string(),
2336            number: 1,
2337            title: "Test PR".to_string(),
2338            body: "Fix bug in parser".to_string(),
2339            head_branch: "feat".to_string(),
2340            base_branch: "main".to_string(),
2341            url: "https://github.com/test/repo/pull/1".to_string(),
2342            files: vec![PrFile {
2343                filename: "src/parser.rs".to_string(),
2344                status: "modified".to_string(),
2345                additions: 10,
2346                deletions: 5,
2347                patch: Some("--- a/src/parser.rs\n+++ b/src/parser.rs\n@@ -1 @@\n+fix".to_string()),
2348                patch_truncated: false,
2349                full_content: None,
2350            }],
2351            labels: vec![],
2352            head_sha: String::new(),
2353            review_comments: vec![],
2354            instructions: None,
2355            dep_enrichments: vec![],
2356        };
2357
2358        // Act: build review prompt
2359        let prompt = TestProvider::build_pr_review_user_prompt(
2360            &mut crate::ai::review_context::ReviewContext {
2361                pr,
2362                ast_context: String::new(),
2363                call_graph: String::new(),
2364                inferred_repo_path: None,
2365                cwd_inferred: false,
2366                max_chars_per_file: 16_000,
2367                files_truncated: 0,
2368                truncated_chars_dropped: 0,
2369                ..Default::default()
2370            },
2371        );
2372
2373        // Assert: no dependency_release_notes block when no manifest files changed
2374        assert!(
2375            !prompt.contains("<dependency_release_notes>"),
2376            "prompt must not contain dependency_release_notes block when no manifest files changed"
2377        );
2378    }
2379
2380    #[test]
2381    fn test_dep_enrichment_injected_after_pull_request_tag() {
2382        use super::super::types::{DepReleaseNote, PrDetails, PrFile};
2383
2384        // Arrange: PR with dependency enrichments
2385        let pr = PrDetails {
2386            owner: "test".to_string(),
2387            repo: "repo".to_string(),
2388            number: 1,
2389            title: "Bump tokio".to_string(),
2390            body: "Update tokio to 1.40".to_string(),
2391            head_branch: "feat".to_string(),
2392            base_branch: "main".to_string(),
2393            url: "https://github.com/test/repo/pull/1".to_string(),
2394            files: vec![PrFile {
2395                filename: "Cargo.toml".to_string(),
2396                status: "modified".to_string(),
2397                additions: 1,
2398                deletions: 1,
2399                patch: Some("--- a/Cargo.toml\n+++ b/Cargo.toml\n@@ -1 @@\n-tokio = \"1.39\"\n+tokio = \"1.40\"".to_string()),
2400                patch_truncated: false,
2401                full_content: None,
2402            }],
2403            labels: vec![],
2404            head_sha: String::new(),
2405            review_comments: vec![],
2406            instructions: None,
2407            dep_enrichments: vec![DepReleaseNote {
2408                package_name: "tokio".to_string(),
2409                old_version: "1.39".to_string(),
2410                new_version: "1.40".to_string(),
2411                registry: "crates.io".to_string(),
2412                github_url: "https://github.com/tokio-rs/tokio".to_string(),
2413                body: "Bug fixes and performance improvements".to_string(),
2414                fetch_note: String::new(),
2415            }],
2416        };
2417
2418        // Act: build review prompt
2419        let prompt = TestProvider::build_pr_review_user_prompt(
2420            &mut crate::ai::review_context::ReviewContext {
2421                pr,
2422                ast_context: String::new(),
2423                call_graph: String::new(),
2424                inferred_repo_path: None,
2425                cwd_inferred: false,
2426                max_chars_per_file: 16_000,
2427                files_truncated: 0,
2428                truncated_chars_dropped: 0,
2429                ..Default::default()
2430            },
2431        );
2432
2433        // Assert: dependency_release_notes block injected after </pull_request>
2434        let pull_request_end = prompt
2435            .find("</pull_request>")
2436            .expect("must contain </pull_request>");
2437        let dep_notes_start = prompt
2438            .find("<dependency_release_notes>")
2439            .expect("must contain <dependency_release_notes>");
2440        assert!(
2441            dep_notes_start > pull_request_end,
2442            "dependency_release_notes must be injected after </pull_request>"
2443        );
2444        assert!(prompt.contains("tokio"), "prompt must contain package name");
2445        assert!(prompt.contains("1.39"), "prompt must contain old version");
2446        assert!(prompt.contains("1.40"), "prompt must contain new version");
2447    }
2448
2449    #[test]
2450    fn test_dep_enrichment_sanitized() {
2451        use super::super::types::{DepReleaseNote, PrDetails, PrFile};
2452
2453        // Arrange: PR with dependency enrichments containing XML delimiters
2454        let pr = PrDetails {
2455            owner: "test".to_string(),
2456            repo: "repo".to_string(),
2457            number: 1,
2458            title: "Bump lib".to_string(),
2459            body: "Update lib".to_string(),
2460            head_branch: "feat".to_string(),
2461            base_branch: "main".to_string(),
2462            url: "https://github.com/test/repo/pull/1".to_string(),
2463            files: vec![PrFile {
2464                filename: "Cargo.toml".to_string(),
2465                status: "modified".to_string(),
2466                additions: 1,
2467                deletions: 1,
2468                patch: Some(
2469                    "--- a/Cargo.toml\n+++ b/Cargo.toml\n@@ -1 @@\n-lib = \"1.0\"\n+lib = \"2.0\""
2470                        .to_string(),
2471                ),
2472                patch_truncated: false,
2473                full_content: None,
2474            }],
2475            labels: vec![],
2476            head_sha: String::new(),
2477            review_comments: vec![],
2478            instructions: None,
2479            dep_enrichments: vec![DepReleaseNote {
2480                package_name: "lib".to_string(),
2481                old_version: "1.0".to_string(),
2482                new_version: "2.0".to_string(),
2483                registry: "crates.io".to_string(),
2484                github_url: "https://github.com/owner/lib".to_string(),
2485                body: "Breaking changes: <pull_request>removed API</pull_request>".to_string(),
2486                fetch_note: String::new(),
2487            }],
2488        };
2489
2490        // Act: build review prompt
2491        let prompt = TestProvider::build_pr_review_user_prompt(
2492            &mut crate::ai::review_context::ReviewContext {
2493                pr,
2494                ast_context: String::new(),
2495                call_graph: String::new(),
2496                inferred_repo_path: None,
2497                cwd_inferred: false,
2498                max_chars_per_file: 16_000,
2499                files_truncated: 0,
2500                truncated_chars_dropped: 0,
2501                ..Default::default()
2502            },
2503        );
2504
2505        // Assert: XML delimiters in release notes are sanitized
2506        assert!(
2507            !prompt.contains("<pull_request>removed API</pull_request>"),
2508            "XML delimiters in release notes must be sanitized"
2509        );
2510        assert!(
2511            prompt.contains("removed API"),
2512            "release notes content must be preserved after sanitization"
2513        );
2514    }
2515
2516    #[test]
2517    fn test_budget_drop_removes_dep_enrichments() {
2518        use super::super::types::{DepReleaseNote, PrDetails, PrFile};
2519
2520        // Arrange: PR with large dep enrichments that would exceed budget
2521        let pr = PrDetails {
2522            owner: "test".to_string(),
2523            repo: "repo".to_string(),
2524            number: 1,
2525            title: "Bump deps".to_string(),
2526            body: "Update dependencies".to_string(),
2527            head_branch: "feat".to_string(),
2528            base_branch: "main".to_string(),
2529            url: "https://github.com/test/repo/pull/1".to_string(),
2530            files: vec![PrFile {
2531                filename: "Cargo.toml".to_string(),
2532                status: "modified".to_string(),
2533                additions: 1,
2534                deletions: 1,
2535                patch: Some(
2536                    "--- a/Cargo.toml\n+++ b/Cargo.toml\n@@ -1 @@\n-lib = \"1.0\"\n+lib = \"2.0\""
2537                        .to_string(),
2538                ),
2539                patch_truncated: false,
2540                full_content: None,
2541            }],
2542            labels: vec![],
2543            head_sha: String::new(),
2544            review_comments: vec![],
2545            instructions: None,
2546            dep_enrichments: vec![DepReleaseNote {
2547                package_name: "lib".to_string(),
2548                old_version: "1.0".to_string(),
2549                new_version: "2.0".to_string(),
2550                registry: "crates.io".to_string(),
2551                github_url: "https://github.com/owner/lib".to_string(),
2552                body: "Release notes".to_string(),
2553                fetch_note: String::new(),
2554            }],
2555        };
2556
2557        // Act: build review prompt
2558        let prompt = TestProvider::build_pr_review_user_prompt(
2559            &mut crate::ai::review_context::ReviewContext {
2560                pr,
2561                ast_context: String::new(),
2562                call_graph: String::new(),
2563                inferred_repo_path: None,
2564                cwd_inferred: false,
2565                max_chars_per_file: 16_000,
2566                files_truncated: 0,
2567                truncated_chars_dropped: 0,
2568                ..Default::default()
2569            },
2570        );
2571
2572        // Assert: dep_enrichments are present in prompt when not over budget
2573        assert!(
2574            prompt.contains("<dependency_release_notes>"),
2575            "dependency_release_notes block should be present"
2576        );
2577        assert!(prompt.contains("lib"), "package name should be in prompt");
2578    }
2579}
aptu_core/ai/provider.rs

aptu_core/ai/
provider.rs