Skip to main content

rlm_rs/core/
buffer.rs

1//! Buffer management for RLM-RS.
2//!
3//! Buffers represent text content loaded into the RLM system, typically
4//! from files or direct input. Each buffer can be chunked for processing.
5
6use crate::io::{current_timestamp, find_char_boundary};
7use serde::{Deserialize, Serialize};
8use std::path::PathBuf;
9
10/// Represents a text buffer in the RLM system.
11///
12/// Buffers are the primary unit of content storage, containing text
13/// that can be chunked and processed by the RLM workflow.
14///
15/// # Examples
16///
17/// ```
18/// use rlm_rs::core::Buffer;
19///
20/// let buffer = Buffer::from_content("Hello, world!".to_string());
21/// assert_eq!(buffer.size(), 13);
22/// ```
23#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
24pub struct Buffer {
25    /// Unique identifier (assigned by storage layer).
26    pub id: Option<i64>,
27
28    /// Optional name for the buffer.
29    pub name: Option<String>,
30
31    /// Source file path (if loaded from file).
32    pub source: Option<PathBuf>,
33
34    /// Buffer content.
35    pub content: String,
36
37    /// Buffer metadata.
38    pub metadata: BufferMetadata,
39}
40
41/// Metadata associated with a buffer.
42#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
43pub struct BufferMetadata {
44    /// Content type or file extension (e.g., "txt", "md", "json").
45    pub content_type: Option<String>,
46
47    /// Unix timestamp when buffer was created.
48    pub created_at: i64,
49
50    /// Unix timestamp when buffer was last modified.
51    pub updated_at: i64,
52
53    /// Total size in bytes.
54    pub size: usize,
55
56    /// Line count (computed on demand).
57    pub line_count: Option<usize>,
58
59    /// Number of chunks (set after chunking).
60    pub chunk_count: Option<usize>,
61
62    /// SHA-256 hash of content (for deduplication).
63    pub content_hash: Option<String>,
64}
65
66impl Buffer {
67    /// Creates a new buffer from content string.
68    ///
69    /// # Arguments
70    ///
71    /// * `content` - The text content for the buffer.
72    ///
73    /// # Examples
74    ///
75    /// ```
76    /// use rlm_rs::core::Buffer;
77    ///
78    /// let buffer = Buffer::from_content("Some text content".to_string());
79    /// assert!(buffer.id.is_none());
80    /// assert!(buffer.source.is_none());
81    /// ```
82    #[must_use]
83    pub fn from_content(content: String) -> Self {
84        let size = content.len();
85        let now = current_timestamp();
86        Self {
87            id: None,
88            name: None,
89            source: None,
90            content,
91            metadata: BufferMetadata {
92                size,
93                created_at: now,
94                updated_at: now,
95                ..Default::default()
96            },
97        }
98    }
99
100    /// Creates a new buffer from a file path and content.
101    ///
102    /// # Arguments
103    ///
104    /// * `path` - Path to the source file.
105    /// * `content` - The text content read from the file.
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// use rlm_rs::core::Buffer;
111    /// use std::path::PathBuf;
112    ///
113    /// let buffer = Buffer::from_file(
114    ///     PathBuf::from("example.txt"),
115    ///     "File content".to_string(),
116    /// );
117    /// assert!(buffer.source.is_some());
118    /// ```
119    #[must_use]
120    pub fn from_file(path: PathBuf, content: String) -> Self {
121        let size = content.len();
122        let content_type = infer_content_type(&path);
123        let name = path
124            .file_name()
125            .and_then(|n| n.to_str())
126            .map(ToString::to_string);
127        let now = current_timestamp();
128
129        Self {
130            id: None,
131            name,
132            source: Some(path),
133            content,
134            metadata: BufferMetadata {
135                content_type,
136                size,
137                created_at: now,
138                updated_at: now,
139                ..Default::default()
140            },
141        }
142    }
143
144    /// Creates a new named buffer from content.
145    ///
146    /// # Arguments
147    ///
148    /// * `name` - Name for the buffer.
149    /// * `content` - The text content for the buffer.
150    #[must_use]
151    pub fn from_named(name: String, content: String) -> Self {
152        let mut buffer = Self::from_content(content);
153        buffer.name = Some(name);
154        buffer
155    }
156
157    /// Returns the size of the buffer in bytes.
158    #[must_use]
159    pub const fn size(&self) -> usize {
160        self.content.len()
161    }
162
163    /// Returns the line count of the buffer.
164    ///
165    /// This is computed on first call and cached.
166    pub fn line_count(&mut self) -> usize {
167        if let Some(count) = self.metadata.line_count {
168            return count;
169        }
170        let count = self.content.lines().count();
171        self.metadata.line_count = Some(count);
172        count
173    }
174
175    /// Returns a slice of the buffer content.
176    ///
177    /// # Arguments
178    ///
179    /// * `start` - Start byte offset.
180    /// * `end` - End byte offset.
181    ///
182    /// # Returns
183    ///
184    /// The content slice, or `None` if offsets are invalid.
185    #[must_use]
186    pub fn slice(&self, start: usize, end: usize) -> Option<&str> {
187        if start <= end && end <= self.content.len() {
188            self.content.get(start..end)
189        } else {
190            None
191        }
192    }
193
194    /// Returns a peek of the buffer content from the beginning.
195    ///
196    /// # Arguments
197    ///
198    /// * `len` - Maximum number of bytes to return.
199    #[must_use]
200    pub fn peek(&self, len: usize) -> &str {
201        let end = len.min(self.content.len());
202        // Find valid UTF-8 boundary
203        let end = find_char_boundary(&self.content, end);
204        &self.content[..end]
205    }
206
207    /// Returns a peek of the buffer content from the end.
208    ///
209    /// # Arguments
210    ///
211    /// * `len` - Maximum number of bytes to return.
212    #[must_use]
213    pub fn peek_end(&self, len: usize) -> &str {
214        let start = self.content.len().saturating_sub(len);
215        // Find valid UTF-8 boundary
216        let start = find_char_boundary(&self.content, start);
217        &self.content[start..]
218    }
219
220    /// Checks if the buffer is empty.
221    #[must_use]
222    pub const fn is_empty(&self) -> bool {
223        self.content.is_empty()
224    }
225
226    /// Returns the display name for this buffer.
227    #[must_use]
228    pub fn display_name(&self) -> String {
229        if let Some(ref name) = self.name {
230            return name.clone();
231        }
232        if let Some(ref path) = self.source
233            && let Some(name) = path.file_name()
234            && let Some(s) = name.to_str()
235        {
236            return s.to_string();
237        }
238        if let Some(id) = self.id {
239            return format!("buffer-{id}");
240        }
241        "unnamed".to_string()
242    }
243
244    /// Sets the chunk count after chunking.
245    pub fn set_chunk_count(&mut self, count: usize) {
246        self.metadata.chunk_count = Some(count);
247        self.metadata.updated_at = current_timestamp();
248    }
249
250    /// Computes and sets the content hash.
251    pub fn compute_hash(&mut self) {
252        use std::collections::hash_map::DefaultHasher;
253        use std::hash::{Hash, Hasher};
254
255        let mut hasher = DefaultHasher::new();
256        self.content.hash(&mut hasher);
257        self.metadata.content_hash = Some(format!("{:016x}", hasher.finish()));
258    }
259}
260
261/// Infers content type from file extension.
262fn infer_content_type(path: &std::path::Path) -> Option<String> {
263    path.extension()
264        .and_then(|ext| ext.to_str())
265        .map(str::to_lowercase)
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_buffer_from_content() {
274        let buffer = Buffer::from_content("Hello, world!".to_string());
275        assert!(buffer.id.is_none());
276        assert!(buffer.source.is_none());
277        assert_eq!(buffer.size(), 13);
278        assert!(!buffer.is_empty());
279    }
280
281    #[test]
282    fn test_buffer_from_file() {
283        let buffer = Buffer::from_file(PathBuf::from("test.txt"), "content".to_string());
284        assert_eq!(buffer.source, Some(PathBuf::from("test.txt")));
285        assert_eq!(buffer.metadata.content_type, Some("txt".to_string()));
286        assert_eq!(buffer.name, Some("test.txt".to_string()));
287    }
288
289    #[test]
290    fn test_buffer_from_named() {
291        let buffer = Buffer::from_named("my-buffer".to_string(), "content".to_string());
292        assert_eq!(buffer.name, Some("my-buffer".to_string()));
293    }
294
295    #[test]
296    fn test_buffer_slice() {
297        let buffer = Buffer::from_content("Hello, world!".to_string());
298        assert_eq!(buffer.slice(0, 5), Some("Hello"));
299        assert_eq!(buffer.slice(7, 12), Some("world"));
300        assert_eq!(buffer.slice(0, 100), None); // Out of bounds
301        assert_eq!(buffer.slice(10, 5), None); // Invalid range
302    }
303
304    #[test]
305    fn test_buffer_peek() {
306        let buffer = Buffer::from_content("Hello, world!".to_string());
307        assert_eq!(buffer.peek(5), "Hello");
308        assert_eq!(buffer.peek(100), "Hello, world!"); // Clamped
309    }
310
311    #[test]
312    fn test_buffer_peek_end() {
313        let buffer = Buffer::from_content("Hello, world!".to_string());
314        assert_eq!(buffer.peek_end(6), "world!");
315        assert_eq!(buffer.peek_end(100), "Hello, world!"); // Clamped
316    }
317
318    #[test]
319    fn test_buffer_line_count() {
320        let mut buffer = Buffer::from_content("line1\nline2\nline3".to_string());
321        assert_eq!(buffer.line_count(), 3);
322        // Second call uses cached value
323        assert_eq!(buffer.line_count(), 3);
324        assert_eq!(buffer.metadata.line_count, Some(3));
325    }
326
327    #[test]
328    fn test_buffer_display_name() {
329        let buffer1 = Buffer::from_named("named".to_string(), String::new());
330        assert_eq!(buffer1.display_name(), "named");
331
332        let buffer2 = Buffer::from_file(PathBuf::from("/path/to/file.txt"), String::new());
333        assert_eq!(buffer2.display_name(), "file.txt");
334
335        let mut buffer3 = Buffer::from_content(String::new());
336        buffer3.id = Some(42);
337        assert_eq!(buffer3.display_name(), "buffer-42");
338
339        let buffer4 = Buffer::from_content(String::new());
340        assert_eq!(buffer4.display_name(), "unnamed");
341    }
342
343    #[test]
344    fn test_buffer_display_name_source_without_name() {
345        // Test display_name when buffer has source path but no name (lines 232-234)
346        let mut buffer = Buffer::from_content(String::new());
347        buffer.source = Some(PathBuf::from("/some/path/to/document.md"));
348        // name is None, source is Some - should extract filename from path
349        assert_eq!(buffer.display_name(), "document.md");
350    }
351
352    #[test]
353    fn test_buffer_hash() {
354        let mut buffer = Buffer::from_content("Hello".to_string());
355        buffer.compute_hash();
356        assert!(buffer.metadata.content_hash.is_some());
357
358        let mut buffer2 = Buffer::from_content("Hello".to_string());
359        buffer2.compute_hash();
360        assert_eq!(buffer.metadata.content_hash, buffer2.metadata.content_hash);
361    }
362
363    #[test]
364    fn test_buffer_empty() {
365        let buffer = Buffer::from_content(String::new());
366        assert!(buffer.is_empty());
367        assert_eq!(buffer.size(), 0);
368    }
369
370    #[test]
371    fn test_buffer_serialization() {
372        let buffer = Buffer::from_named("test".to_string(), "content".to_string());
373        let json = serde_json::to_string(&buffer);
374        assert!(json.is_ok());
375
376        let deserialized: Result<Buffer, _> = serde_json::from_str(&json.unwrap());
377        assert!(deserialized.is_ok());
378        assert_eq!(deserialized.unwrap().content, "content");
379    }
380}