Skip to main content

engram/intelligence/
content_utils.rs

1//! Content utilities for memory display and manipulation
2//!
3//! Provides:
4//! - **Soft trim**: Smart truncation preserving head (60%) and tail (30%) with ellipsis
5//! - **Compact preview**: Short preview for list views
6//! - **Content statistics**: Character/word/line counts
7
8use serde::{Deserialize, Serialize};
9
10/// Configuration for soft trim operation
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct SoftTrimConfig {
13    /// Maximum total characters (default: 500)
14    pub max_chars: usize,
15    /// Percentage of max_chars for head portion (default: 60)
16    pub head_percent: usize,
17    /// Percentage of max_chars for tail portion (default: 30)
18    pub tail_percent: usize,
19    /// Ellipsis string to use (default: "\n...\n")
20    pub ellipsis: String,
21    /// Preserve word boundaries (default: true)
22    pub preserve_words: bool,
23}
24
25impl Default for SoftTrimConfig {
26    fn default() -> Self {
27        Self {
28            max_chars: 500,
29            head_percent: 60,
30            tail_percent: 30,
31            ellipsis: "\n...\n".to_string(),
32            preserve_words: true,
33        }
34    }
35}
36
37/// Result of soft trim operation
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SoftTrimResult {
40    /// The trimmed content
41    pub content: String,
42    /// Whether the content was actually trimmed
43    pub was_trimmed: bool,
44    /// Original character count
45    pub original_chars: usize,
46    /// Trimmed character count
47    pub trimmed_chars: usize,
48    /// Characters removed
49    pub chars_removed: usize,
50}
51
52/// Compact memory representation for list views
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct CompactMemory {
55    /// Memory ID
56    pub id: i64,
57    /// First line or preview of content (max 100 chars)
58    pub preview: String,
59    /// Memory type
60    pub memory_type: String,
61    /// Tags
62    pub tags: Vec<String>,
63    /// Importance score
64    pub importance: Option<f32>,
65    /// Created timestamp
66    pub created_at: String,
67    /// Updated timestamp
68    pub updated_at: String,
69    /// Workspace
70    pub workspace: String,
71    /// Tier (permanent/daily)
72    pub tier: String,
73    /// Full content character count
74    pub content_length: usize,
75    /// Whether content was truncated for preview
76    pub is_truncated: bool,
77}
78
79/// Content statistics
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct ContentStats {
82    /// Character count
83    pub chars: usize,
84    /// Word count (whitespace-separated)
85    pub words: usize,
86    /// Line count
87    pub lines: usize,
88    /// Sentence count (approximate)
89    pub sentences: usize,
90    /// Paragraph count (double newline separated)
91    pub paragraphs: usize,
92}
93
94/// Perform soft trim on content
95///
96/// Preserves the beginning (head) and end (tail) of content while
97/// removing the middle portion if content exceeds max_chars.
98///
99/// Default split: 60% head, 30% tail, 10% for ellipsis overhead
100///
101/// # Example
102///
103/// ```
104/// use engram::intelligence::content_utils::{soft_trim, SoftTrimConfig};
105///
106/// let long_content = "A".repeat(1000);
107/// let result = soft_trim(&long_content, &SoftTrimConfig::default());
108/// assert!(result.was_trimmed);
109/// assert!(result.content.len() < 1000);
110/// assert!(result.content.contains("..."));
111/// ```
112pub fn soft_trim(content: &str, config: &SoftTrimConfig) -> SoftTrimResult {
113    let original_chars = content.chars().count();
114
115    // If content fits, return as-is
116    if original_chars <= config.max_chars {
117        return SoftTrimResult {
118            content: content.to_string(),
119            was_trimmed: false,
120            original_chars,
121            trimmed_chars: original_chars,
122            chars_removed: 0,
123        };
124    }
125
126    // Calculate head and tail sizes (in characters, not bytes)
127    let ellipsis_char_len = config.ellipsis.chars().count();
128    let available = config.max_chars.saturating_sub(ellipsis_char_len);
129    let head_char_count = (available * config.head_percent) / 100;
130    let tail_char_count = (available * config.tail_percent) / 100;
131
132    // Convert character count to byte index for head
133    let head_byte_end: usize = content
134        .char_indices()
135        .take(head_char_count)
136        .last()
137        .map(|(i, c)| i + c.len_utf8())
138        .unwrap_or(0);
139
140    let mut head_end = head_byte_end;
141    if config.preserve_words && head_end < content.len() {
142        // Find last space before head_end
143        if let Some(last_space) = content[..head_end].rfind(|c: char| c.is_whitespace()) {
144            if last_space > head_end / 2 {
145                head_end = last_space;
146            }
147        }
148    }
149
150    // Convert character count from end to byte index for tail
151    let total_chars = original_chars;
152    let tail_start_char = total_chars.saturating_sub(tail_char_count);
153    let tail_byte_start: usize = content
154        .char_indices()
155        .nth(tail_start_char)
156        .map(|(i, _)| i)
157        .unwrap_or(content.len());
158
159    let mut tail_start = tail_byte_start;
160    if config.preserve_words && tail_start > 0 && tail_start < content.len() {
161        // Find first space after tail_start
162        if let Some(first_space) = content[tail_start..].find(|c: char| c.is_whitespace()) {
163            let new_start = tail_start + first_space + 1;
164            if new_start < content.len() {
165                tail_start = new_start;
166            }
167        }
168    }
169
170    // Ensure head and tail don't overlap
171    if head_end >= tail_start {
172        // Content is borderline - just truncate end
173        let truncate_byte_end: usize = content
174            .char_indices()
175            .take(config.max_chars)
176            .last()
177            .map(|(i, c)| i + c.len_utf8())
178            .unwrap_or(content.len());
179        let truncated = &content[..truncate_byte_end.min(content.len())];
180        let trimmed_chars = truncated.chars().count() + ellipsis_char_len;
181        return SoftTrimResult {
182            content: format!("{}{}", truncated.trim_end(), config.ellipsis.trim()),
183            was_trimmed: true,
184            original_chars,
185            trimmed_chars,
186            chars_removed: original_chars - truncated.chars().count(),
187        };
188    }
189
190    let head = content[..head_end].trim_end();
191    let tail = content[tail_start..].trim_start();
192    let trimmed = format!("{}{}{}", head, config.ellipsis, tail);
193
194    SoftTrimResult {
195        content: trimmed.clone(),
196        was_trimmed: true,
197        original_chars,
198        trimmed_chars: trimmed.chars().count(),
199        chars_removed: original_chars - head.chars().count() - tail.chars().count(),
200    }
201}
202
203/// Generate a compact preview of content
204///
205/// Returns the first line or first N characters, whichever is shorter.
206pub fn compact_preview(content: &str, max_chars: usize) -> (String, bool) {
207    let content = content.trim();
208
209    if content.is_empty() {
210        return (String::new(), false);
211    }
212
213    // Get first line
214    let first_line = content.lines().next().unwrap_or(content);
215
216    // Use character count for comparison (not byte length)
217    let char_count = first_line.chars().count();
218    if char_count <= max_chars {
219        let is_truncated = content.len() > first_line.len();
220        return (first_line.to_string(), is_truncated);
221    }
222
223    // Find byte position of max_chars'th character (UTF-8 safe)
224    let mut byte_end = first_line
225        .char_indices()
226        .nth(max_chars.min(char_count))
227        .map(|(pos, _)| pos)
228        .unwrap_or(first_line.len());
229
230    // Truncate at word boundary if possible
231    let slice_to_check = &first_line[..byte_end];
232    if let Some(last_space) = slice_to_check.rfind(' ') {
233        // Only use space if it's in the latter half
234        if last_space > byte_end / 2 {
235            byte_end = last_space;
236        }
237    }
238
239    let preview = format!("{}...", first_line[..byte_end].trim_end());
240    (preview, true)
241}
242
243/// Calculate content statistics
244pub fn content_stats(content: &str) -> ContentStats {
245    let chars = content.chars().count(); // Use actual character count, not byte length
246    let words = content.split_whitespace().count();
247    let lines = content.lines().count().max(1);
248
249    // Approximate sentence count (ends with . ! ?)
250    let sentences = content
251        .chars()
252        .filter(|c| *c == '.' || *c == '!' || *c == '?')
253        .count()
254        .max(1);
255
256    // Paragraph count (separated by blank lines)
257    let paragraphs = content
258        .split("\n\n")
259        .filter(|p| !p.trim().is_empty())
260        .count()
261        .max(1);
262
263    ContentStats {
264        chars,
265        words,
266        lines,
267        sentences,
268        paragraphs,
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_soft_trim_short_content() {
278        let content = "Short content";
279        let result = soft_trim(content, &SoftTrimConfig::default());
280
281        assert!(!result.was_trimmed);
282        assert_eq!(result.content, content);
283        assert_eq!(result.chars_removed, 0);
284    }
285
286    #[test]
287    fn test_soft_trim_long_content() {
288        let content = "A".repeat(1000);
289        let config = SoftTrimConfig {
290            max_chars: 100,
291            ..Default::default()
292        };
293        let result = soft_trim(&content, &config);
294
295        assert!(result.was_trimmed);
296        assert!(result.content.len() <= 100);
297        assert!(result.content.contains("..."));
298        assert!(result.chars_removed > 0);
299    }
300
301    #[test]
302    fn test_soft_trim_preserves_head_and_tail() {
303        let content = format!(
304            "HEADER: Important beginning content. {} FOOTER: Critical ending info.",
305            "Middle content that can be removed. ".repeat(50)
306        );
307        let config = SoftTrimConfig {
308            max_chars: 200,
309            ..Default::default()
310        };
311        let result = soft_trim(&content, &config);
312
313        assert!(result.was_trimmed);
314        assert!(result.content.starts_with("HEADER"));
315        assert!(result.content.ends_with("info."));
316    }
317
318    #[test]
319    fn test_soft_trim_word_boundaries() {
320        let content = "The quick brown fox jumps over the lazy dog. ".repeat(20);
321        let config = SoftTrimConfig {
322            max_chars: 100,
323            preserve_words: true,
324            ..Default::default()
325        };
326        let result = soft_trim(&content, &config);
327
328        // Should not break in middle of a word
329        assert!(!result.content.ends_with("Th"));
330        assert!(!result.content.ends_with("fo"));
331    }
332
333    #[test]
334    fn test_compact_preview_short() {
335        let content = "Short content";
336        let (preview, truncated) = compact_preview(content, 100);
337
338        assert_eq!(preview, "Short content");
339        assert!(!truncated);
340    }
341
342    #[test]
343    fn test_compact_preview_long() {
344        let content = "This is a very long first line that exceeds the maximum character limit for preview display";
345        let (preview, truncated) = compact_preview(content, 30);
346
347        assert!(preview.len() <= 33); // 30 + "..."
348        assert!(preview.ends_with("..."));
349        assert!(truncated);
350    }
351
352    #[test]
353    fn test_compact_preview_multiline() {
354        let content = "First line only\nSecond line ignored\nThird line also";
355        let (preview, truncated) = compact_preview(content, 100);
356
357        assert_eq!(preview, "First line only");
358        assert!(truncated); // More content exists
359    }
360
361    #[test]
362    fn test_content_stats() {
363        let content = "Hello world. This is a test! How are you?\n\nSecond paragraph here.";
364        let stats = content_stats(content);
365
366        // Words: Hello, world, This, is, a, test, How, are, you, Second, paragraph, here = 12
367        assert_eq!(stats.words, 12);
368        assert_eq!(stats.lines, 3);
369        // Sentences: "world." + "test!" + "you?" + "here." = 4 sentence endings
370        assert_eq!(stats.sentences, 4);
371        assert_eq!(stats.paragraphs, 2);
372    }
373
374    #[test]
375    fn test_content_stats_empty() {
376        let stats = content_stats("");
377
378        assert_eq!(stats.chars, 0);
379        assert_eq!(stats.words, 0);
380        assert_eq!(stats.lines, 1); // min 1
381        assert_eq!(stats.sentences, 1); // min 1
382        assert_eq!(stats.paragraphs, 1); // min 1
383    }
384
385    #[test]
386    fn test_soft_trim_unicode() {
387        let content = "你好世界!这是一个很长的中文字符串。".repeat(50);
388        let config = SoftTrimConfig {
389            max_chars: 100,
390            ..Default::default()
391        };
392        let result = soft_trim(&content, &config);
393
394        // Should not panic on unicode
395        assert!(result.was_trimmed);
396        // Content should be valid UTF-8
397        assert!(result.content.is_ascii() || !result.content.is_empty());
398    }
399
400    #[test]
401    fn test_compact_preview_empty() {
402        let (preview, truncated) = compact_preview("", 100);
403        assert!(preview.is_empty());
404        assert!(!truncated);
405    }
406
407    #[test]
408    fn test_compact_preview_whitespace_only() {
409        let (preview, truncated) = compact_preview("   \n  \n  ", 100);
410        assert!(preview.is_empty());
411        assert!(!truncated);
412    }
413}