Skip to main content

ralph_workflow/prompts/
content_reference.rs

1//! Content reference types for prompt templates.
2//!
3//! When prompt content (PROMPT, DIFF, PLAN) exceeds size limits, we reference
4//! the content by file path instead of embedding it inline. This prevents
5//! CLI argument limits from being exceeded while still providing agents with
6//! access to all necessary information.
7
8use std::path::{Path, PathBuf};
9
10/// Maximum size in bytes for inline content embedding.
11/// Content larger than this should be referenced by file path.
12///
13/// Set to 100KB which is well below:
14/// - macOS ARG_MAX limit (~1MB)
15/// - Linux per-argument limit (~128KB)
16///
17/// This conservative limit ensures safety across platforms.
18pub const MAX_INLINE_CONTENT_SIZE: usize = 100 * 1024; // 100KB
19
20/// Represents content that can be either inline or referenced by path.
21///
22/// When content is small enough, it's embedded directly in the prompt.
23/// When content exceeds [`MAX_INLINE_CONTENT_SIZE`], instructions are
24/// provided to the agent to read the content from a file.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub enum PromptContentReference {
27    /// Content is small enough to embed inline in the prompt.
28    Inline(String),
29    /// Content is too large; agent should read from this workspace-relative path.
30    FilePath {
31        /// Workspace-relative path to the backup file containing the content.
32        path: PathBuf,
33        /// Human-readable description of what the content contains.
34        description: String,
35    },
36}
37
38impl PromptContentReference {
39    /// Create a content reference, choosing inline vs path based on size.
40    ///
41    /// If `content.len() <= MAX_INLINE_CONTENT_SIZE`, the content is stored inline.
42    /// Otherwise, a file path reference is created.
43    ///
44    /// # Arguments
45    ///
46    /// * `content` - The content to reference
47    /// * `backup_path` - Path where the content can be read if too large
48    /// * `description` - Description of the content for agent instructions
49    pub fn from_content(content: String, backup_path: &Path, description: &str) -> Self {
50        if content.len() <= MAX_INLINE_CONTENT_SIZE {
51            Self::Inline(content)
52        } else {
53            Self::FilePath {
54                path: backup_path.to_path_buf(),
55                description: description.to_string(),
56            }
57        }
58    }
59
60    /// Create an inline reference (for small content).
61    pub fn inline(content: String) -> Self {
62        Self::Inline(content)
63    }
64
65    /// Create a file path reference (for large content).
66    pub fn file_path(path: PathBuf, description: &str) -> Self {
67        Self::FilePath {
68            path,
69            description: description.to_string(),
70        }
71    }
72
73    /// Returns true if this is an inline reference.
74    pub fn is_inline(&self) -> bool {
75        matches!(self, Self::Inline(_))
76    }
77
78    /// Get the content for template rendering.
79    ///
80    /// For inline: returns the content directly.
81    /// For file path: returns instructions to read from the file.
82    pub fn render_for_template(&self) -> String {
83        match self {
84            Self::Inline(content) => content.clone(),
85            Self::FilePath { path, description } => {
86                format!(
87                    "[Content too large to embed - Read from: {}]\n\
88                     Description: {}\n\
89                     Use your file reading tools to access this file.",
90                    path.display(),
91                    description
92                )
93            }
94        }
95    }
96}
97
98/// Specialized reference for DIFF content.
99///
100/// When DIFF is too large, the pipeline prefers writing the full diff to a file so
101/// agents can read it without invoking git. Some prompts (e.g., review) may include
102/// git-based fallback instructions as a last resort.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub enum DiffContentReference {
105    /// DIFF is small enough to embed inline.
106    Inline(String),
107    /// DIFF is too large; agent should read from a file (with optional git fallback).
108    ReadFromFile {
109        /// Workspace-relative path to the diff file containing the content.
110        path: PathBuf,
111        /// The commit hash to diff from (fallback if file is missing).
112        start_commit: String,
113        /// Description of why file reading is needed.
114        description: String,
115    },
116}
117
118impl DiffContentReference {
119    /// Create a diff reference, choosing inline vs file reference based on size.
120    ///
121    /// If `diff_content.len() <= MAX_INLINE_CONTENT_SIZE`, the diff is stored inline.
122    /// Otherwise, instructions to read from a file are provided.
123    ///
124    /// # Arguments
125    ///
126    /// * `diff_content` - The diff content
127    /// * `start_commit` - The commit hash to diff from
128    pub fn from_diff(diff_content: String, start_commit: &str, diff_path: &Path) -> Self {
129        if diff_content.len() <= MAX_INLINE_CONTENT_SIZE {
130            Self::Inline(diff_content)
131        } else {
132            Self::ReadFromFile {
133                path: diff_path.to_path_buf(),
134                start_commit: start_commit.to_string(),
135                description: format!(
136                    "Diff is {} bytes (exceeds {} limit)",
137                    diff_content.len(),
138                    MAX_INLINE_CONTENT_SIZE
139                ),
140            }
141        }
142    }
143
144    /// Get the content for template rendering.
145    ///
146    /// For inline: returns the diff content directly.
147    /// For file reference: returns instructions to read from the provided path,
148    /// plus optional git fallback commands.
149    pub fn render_for_template(&self) -> String {
150        match self {
151            Self::Inline(content) => content.clone(),
152            Self::ReadFromFile {
153                path,
154                start_commit,
155                description,
156            } => {
157                if start_commit.is_empty() {
158                    format!(
159                        "[DIFF too large to embed - Read from file]\n\
160                         {}\n\n\
161                         Read the diff from: {}\n\
162                         If this file is missing or unavailable, regenerate it with git (last resort):\n\
163                         - Unstaged changes: git diff\n\
164                         - Staged changes:   git diff --cached\n\
165                         - Untracked files:  git ls-files --others --exclude-standard\n",
166                        description,
167                        path.display(),
168                    )
169                } else {
170                    format!(
171                        "[DIFF too large to embed - Read from file]\n\
172                         {}\n\n\
173                         Read the diff from: {}\n\
174                         If this file is missing or unavailable, regenerate it with git (last resort):\n\
175                         - Unstaged changes: git diff {}\n\
176                         - Staged changes:   git diff --cached {}\n\
177                         - Untracked files:  git ls-files --others --exclude-standard\n",
178                        description,
179                        path.display(),
180                        start_commit,
181                        start_commit,
182                    )
183                }
184            }
185        }
186    }
187
188    /// Returns true if this is an inline reference.
189    pub fn is_inline(&self) -> bool {
190        matches!(self, Self::Inline(_))
191    }
192}
193
194/// Specialized reference for PLAN content.
195///
196/// When PLAN is too large, instructs the agent to read from PLAN.md
197/// with optional fallback to the XML plan file.
198#[derive(Debug, Clone, PartialEq, Eq)]
199pub enum PlanContentReference {
200    /// PLAN is small enough to embed inline.
201    Inline(String),
202    /// PLAN is too large; agent should read from file.
203    ReadFromFile {
204        /// Primary path to the plan file (usually .agent/PLAN.md), workspace-relative.
205        primary_path: PathBuf,
206        /// Optional fallback path if primary is missing (usually .agent/tmp/plan.xml), workspace-relative.
207        fallback_path: Option<PathBuf>,
208        /// Description of why file reading is needed.
209        description: String,
210    },
211}
212
213impl PlanContentReference {
214    /// Create a plan reference, choosing inline vs file path based on size.
215    ///
216    /// If `plan_content.len() <= MAX_INLINE_CONTENT_SIZE`, the plan is stored inline.
217    /// Otherwise, instructions to read from file are provided.
218    ///
219    /// # Arguments
220    ///
221    /// * `plan_content` - The plan content
222    /// * `plan_path` - Path to the primary plan file
223    /// * `xml_fallback_path` - Optional path to XML fallback
224    pub fn from_plan(
225        plan_content: String,
226        plan_path: &Path,
227        xml_fallback_path: Option<&Path>,
228    ) -> Self {
229        if plan_content.len() <= MAX_INLINE_CONTENT_SIZE {
230            Self::Inline(plan_content)
231        } else {
232            Self::ReadFromFile {
233                primary_path: plan_path.to_path_buf(),
234                fallback_path: xml_fallback_path.map(|p| p.to_path_buf()),
235                description: format!(
236                    "Plan is {} bytes (exceeds {} limit)",
237                    plan_content.len(),
238                    MAX_INLINE_CONTENT_SIZE
239                ),
240            }
241        }
242    }
243
244    /// Get the content for template rendering.
245    ///
246    /// For inline: returns the plan content directly.
247    /// For file path: returns instructions to read from the file.
248    pub fn render_for_template(&self) -> String {
249        match self {
250            Self::Inline(content) => content.clone(),
251            Self::ReadFromFile {
252                primary_path,
253                fallback_path,
254                description,
255            } => {
256                let fallback_msg = fallback_path.as_ref().map_or(String::new(), |p| {
257                    format!(
258                        "\nIf {} is missing or empty, try reading: {}",
259                        primary_path.display(),
260                        p.display()
261                    )
262                });
263                format!(
264                    "[PLAN too large to embed - Read from file]\n\
265                     {}\n\n\
266                     Read the implementation plan from: {}{}\n\n\
267                     Use your file reading tools to access the plan.",
268                    description,
269                    primary_path.display(),
270                    fallback_msg
271                )
272            }
273        }
274    }
275
276    /// Returns true if this is an inline reference.
277    pub fn is_inline(&self) -> bool {
278        matches!(self, Self::Inline(_))
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    // =========================================================================
287    // PromptContentReference tests
288    // =========================================================================
289
290    #[test]
291    fn test_small_content_is_inline() {
292        let content = "Small content".to_string();
293        let reference = PromptContentReference::from_content(
294            content.clone(),
295            Path::new("/backup/path"),
296            "test",
297        );
298        assert!(reference.is_inline());
299        assert_eq!(reference.render_for_template(), content);
300    }
301
302    #[test]
303    fn test_large_content_becomes_file_path() {
304        let content = "x".repeat(MAX_INLINE_CONTENT_SIZE + 1);
305        let reference = PromptContentReference::from_content(
306            content,
307            Path::new("/backup/prompt.md"),
308            "User requirements",
309        );
310        assert!(!reference.is_inline());
311        let rendered = reference.render_for_template();
312        assert!(rendered.contains("/backup/prompt.md"));
313        assert!(rendered.contains("User requirements"));
314    }
315
316    #[test]
317    fn test_exactly_max_size_is_inline() {
318        let content = "x".repeat(MAX_INLINE_CONTENT_SIZE);
319        let reference = PromptContentReference::from_content(
320            content.clone(),
321            Path::new("/backup/path"),
322            "test",
323        );
324        assert!(reference.is_inline());
325    }
326
327    #[test]
328    fn test_empty_content_is_inline() {
329        let reference =
330            PromptContentReference::from_content(String::new(), Path::new("/backup"), "test");
331        assert!(reference.is_inline());
332        assert_eq!(reference.render_for_template(), "");
333    }
334
335    #[test]
336    fn test_unicode_content_size_in_bytes() {
337        // Unicode characters take multiple bytes
338        // 🎉 is 4 bytes in UTF-8
339        let emoji = "🎉".repeat(MAX_INLINE_CONTENT_SIZE / 4 + 1);
340        let reference = PromptContentReference::from_content(emoji, Path::new("/backup"), "test");
341        // Should exceed limit due to multi-byte characters
342        assert!(!reference.is_inline());
343    }
344
345    #[test]
346    fn test_prompt_inline_constructor() {
347        let content = "Direct content".to_string();
348        let reference = PromptContentReference::inline(content.clone());
349        assert!(reference.is_inline());
350        assert_eq!(reference.render_for_template(), content);
351    }
352
353    #[test]
354    fn test_prompt_file_path_constructor() {
355        let path = PathBuf::from("/path/to/file.md");
356        let reference = PromptContentReference::file_path(path.clone(), "Description");
357        assert!(!reference.is_inline());
358        let rendered = reference.render_for_template();
359        assert!(rendered.contains("/path/to/file.md"));
360        assert!(rendered.contains("Description"));
361    }
362
363    // =========================================================================
364    // DiffContentReference tests
365    // =========================================================================
366
367    #[test]
368    fn test_small_diff_is_inline() {
369        let diff = "+added line\n-removed line".to_string();
370        let reference =
371            DiffContentReference::from_diff(diff.clone(), "abc123", Path::new("/backup/diff.txt"));
372        assert!(reference.is_inline());
373        assert_eq!(reference.render_for_template(), diff);
374    }
375
376    #[test]
377    fn test_large_diff_reads_from_file() {
378        let diff = "x".repeat(MAX_INLINE_CONTENT_SIZE + 1);
379        let reference =
380            DiffContentReference::from_diff(diff, "abc123", Path::new("/backup/diff.txt"));
381        assert!(!reference.is_inline());
382        let rendered = reference.render_for_template();
383        assert!(rendered.contains("/backup/diff.txt"));
384        assert!(rendered.contains("git diff"));
385    }
386
387    #[test]
388    fn test_diff_with_empty_start_commit_includes_git_fallback() {
389        let reference = DiffContentReference::from_diff(
390            "x".repeat(MAX_INLINE_CONTENT_SIZE + 1),
391            "",
392            Path::new("/backup/diff.txt"),
393        );
394        let rendered = reference.render_for_template();
395        assert!(rendered.contains("/backup/diff.txt"));
396        assert!(rendered.contains("Unstaged changes: git diff"));
397        assert!(rendered.contains("Staged changes:   git diff --cached"));
398    }
399
400    #[test]
401    fn test_diff_exactly_max_size_is_inline() {
402        let diff = "d".repeat(MAX_INLINE_CONTENT_SIZE);
403        let reference =
404            DiffContentReference::from_diff(diff.clone(), "abc", Path::new("/backup/diff.txt"));
405        assert!(reference.is_inline());
406        assert_eq!(reference.render_for_template(), diff);
407    }
408
409    // =========================================================================
410    // PlanContentReference tests
411    // =========================================================================
412
413    #[test]
414    fn test_small_plan_is_inline() {
415        let plan = "# Plan\n\n1. Do thing".to_string();
416        let reference =
417            PlanContentReference::from_plan(plan.clone(), Path::new(".agent/PLAN.md"), None);
418        assert!(reference.is_inline());
419        assert_eq!(reference.render_for_template(), plan);
420    }
421
422    #[test]
423    fn test_large_plan_reads_from_file() {
424        let plan = "x".repeat(MAX_INLINE_CONTENT_SIZE + 1);
425        let reference = PlanContentReference::from_plan(
426            plan,
427            Path::new(".agent/PLAN.md"),
428            Some(Path::new(".agent/tmp/plan.xml")),
429        );
430        assert!(!reference.is_inline());
431        let rendered = reference.render_for_template();
432        assert!(rendered.contains(".agent/PLAN.md"));
433        assert!(rendered.contains("plan.xml"));
434    }
435
436    #[test]
437    fn test_plan_without_xml_fallback() {
438        let reference = PlanContentReference::from_plan(
439            "x".repeat(MAX_INLINE_CONTENT_SIZE + 1),
440            Path::new(".agent/PLAN.md"),
441            None,
442        );
443        let rendered = reference.render_for_template();
444        assert!(rendered.contains(".agent/PLAN.md"));
445        assert!(!rendered.contains("plan.xml"));
446    }
447
448    #[test]
449    fn test_plan_exactly_max_size_is_inline() {
450        let plan = "p".repeat(MAX_INLINE_CONTENT_SIZE);
451        let reference =
452            PlanContentReference::from_plan(plan.clone(), Path::new(".agent/PLAN.md"), None);
453        assert!(reference.is_inline());
454        assert_eq!(reference.render_for_template(), plan);
455    }
456}