Skip to main content

distri_types/
filesystem.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use schemars::JsonSchema;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::collections::hash_map::DefaultHasher;
7use std::hash::{Hash, Hasher};
8
9/// Context information for file storage operations
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct FileContext {
12    /// Thread ID for organizing files
13    pub thread_id: String,
14    /// Task ID if available
15    pub task_id: Option<String>,
16    /// Tool call ID that generated this content
17    pub tool_call_id: Option<String>,
18    /// Content type/mime type hint
19    pub content_type: Option<String>,
20    /// Original filename if content represents a file
21    pub original_filename: Option<String>,
22}
23
24/// Pure filesystem metadata about a file - no artifact context
25#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
26pub struct FileMetadata {
27    /// Unique file ID
28    pub file_id: String,
29    /// Relative path from filesystem base
30    pub relative_path: String,
31    /// File size in bytes
32    pub size: u64,
33    /// Content type/mime type if known
34    pub content_type: Option<String>,
35    /// Original filename if available
36    pub original_filename: Option<String>,
37    /// When the file was created
38    #[schemars(with = "String")]
39    pub created_at: chrono::DateTime<chrono::Utc>,
40    /// When the file was last modified
41    #[schemars(with = "String")]
42    pub updated_at: chrono::DateTime<chrono::Utc>,
43    /// File checksum for integrity verification
44    pub checksum: Option<String>,
45    /// Rich statistics about the file content
46    pub stats: Option<FileStats>,
47    /// Short preview of the content for context
48    pub preview: Option<String>,
49}
50
51/// Artifact metadata that combines filesystem metadata with context information
52#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
53pub struct Artifact {
54    /// Filesystem metadata
55    pub file_metadata: FileMetadata,
56    /// Associated thread ID
57    pub thread_id: String,
58    /// Associated task ID if available
59    pub task_id: Option<String>,
60    /// Tool call ID that generated this artifact
61    pub tool_call_id: Option<String>,
62}
63
64impl FileMetadata {
65    /// Get the full display name for this file
66    pub fn display_name(&self) -> String {
67        self.original_filename
68            .clone()
69            .unwrap_or_else(|| format!("file_{}", &self.file_id[..8]))
70    }
71
72    /// Get a human readable size string
73    pub fn size_display(&self) -> String {
74        let size = self.size as f64;
75        if size < 1024.0 {
76            format!("{}B", self.size)
77        } else if size < 1024.0 * 1024.0 {
78            format!("{:.1}KB", size / 1024.0)
79        } else if size < 1024.0 * 1024.0 * 1024.0 {
80            format!("{:.1}MB", size / (1024.0 * 1024.0))
81        } else {
82            format!("{:.1}GB", size / (1024.0 * 1024.0 * 1024.0))
83        }
84    }
85
86    /// Check if this appears to be a text file
87    pub fn is_text_file(&self) -> bool {
88        self.content_type
89            .as_ref()
90            .map(|ct| ct.starts_with("text/") || ct.contains("json") || ct.contains("xml"))
91            .unwrap_or(false)
92    }
93
94    /// Get a summary line for display
95    pub fn summary(&self) -> String {
96        format!(
97            "{} ({}{})",
98            self.display_name(),
99            self.size_display(),
100            if let Some(ct) = &self.content_type {
101                format!(", {}", ct)
102            } else {
103                String::new()
104            }
105        )
106    }
107}
108
109impl Artifact {
110    /// Create a new artifact with file metadata and context
111    pub fn new(
112        file_metadata: FileMetadata,
113        thread_id: String,
114        task_id: Option<String>,
115        tool_call_id: Option<String>,
116    ) -> Self {
117        Self {
118            file_metadata,
119            thread_id,
120            task_id,
121            tool_call_id,
122        }
123    }
124
125    /// Get the artifact path in the namespaced format
126    pub fn artifact_path(&self) -> String {
127        if let Some(task_id) = &self.task_id {
128            format!(
129                "{}/artifact/{}/{}",
130                self.thread_id, task_id, self.file_metadata.file_id
131            )
132        } else {
133            format!("{}/artifact/{}", self.thread_id, self.file_metadata.file_id)
134        }
135    }
136
137    /// Delegate display methods to file metadata
138    pub fn display_name(&self) -> String {
139        self.file_metadata.display_name()
140    }
141
142    pub fn size_display(&self) -> String {
143        self.file_metadata.size_display()
144    }
145
146    pub fn summary(&self) -> String {
147        self.file_metadata.summary()
148    }
149}
150
151/// Artifact namespace for organizing artifacts by thread and task
152/// Handles path creation logic consistently across the codebase
153#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash)]
154pub struct ArtifactNamespace {
155    /// Thread ID (required)
156    pub thread_id: String,
157    /// Task ID (optional - if None, uses thread-level namespace)
158    pub task_id: Option<String>,
159}
160
161impl ArtifactNamespace {
162    /// Create a new namespace from thread_id and optional task_id
163    pub fn new(thread_id: String, task_id: Option<String>) -> Self {
164        Self { thread_id, task_id }
165    }
166
167    /// Convert ID to short hex (8 chars like git commits)
168    fn short_hex(id: &str) -> String {
169        let mut hasher = DefaultHasher::new();
170        id.hash(&mut hasher);
171        format!("{:08x}", hasher.finish())
172    }
173
174    /// Get the thread-level namespace path: `threads/{short_thread}`
175    pub fn thread_path(&self) -> String {
176        let short_thread = Self::short_hex(&self.thread_id);
177        format!("threads/{}", short_thread)
178    }
179
180    /// Get the task-level namespace path: `threads/{short_thread}/tasks/{short_task}`
181    /// Returns None if task_id is not set
182    pub fn task_path(&self) -> Option<String> {
183        self.task_id.as_ref().map(|task_id| {
184            let short_thread = Self::short_hex(&self.thread_id);
185            let short_task = Self::short_hex(task_id);
186            format!("threads/{}/tasks/{}", short_thread, short_task)
187        })
188    }
189
190    /// Get the primary namespace path (task-level if available, otherwise thread-level)
191    /// This is the path where artifacts should be saved
192    pub fn primary_path(&self) -> String {
193        self.task_path().unwrap_or_else(|| self.thread_path())
194    }
195
196    /// Get all paths that should be checked when listing artifacts
197    /// Returns both thread-level and task-level paths (if task_id is set)
198    /// This ensures list_artifacts can find artifacts saved at either level
199    pub fn all_paths(&self) -> Vec<String> {
200        let mut paths = vec![self.thread_path()];
201        if let Some(task_path) = self.task_path() {
202            paths.push(task_path);
203        }
204        paths
205    }
206
207    /// Parse a namespace path back into thread_id and task_id
208    /// Handles both `threads/{hash}` and `threads/{hash}/tasks/{hash}` formats
209    /// Note: This cannot reverse the hash to get the original UUIDs, so it returns None
210    /// In practice, you should store the mapping or use the namespace directly
211    pub fn from_path(_path: &str) -> Option<Self> {
212        // We can't reverse the hash, so we return None
213        // In practice, we'd need to store the mapping or use the namespace directly
214        None
215    }
216}
217
218/// Type-specific file statistics that provide rich metadata about file content
219#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
220#[serde(tag = "type", rename_all = "snake_case")]
221pub enum FileStats {
222    Json(JsonStats),
223    Markdown(MarkdownStats),
224    Text(TextStats),
225}
226
227impl FileStats {
228    /// Get the type of file stats as a string
229    pub fn stats_type(&self) -> &'static str {
230        match self {
231            FileStats::Json(_) => "json",
232            FileStats::Markdown(_) => "markdown",
233            FileStats::Text(_) => "text",
234        }
235    }
236
237    /// Extract a human-readable summary of the file stats
238    pub fn summary(&self) -> String {
239        match self {
240            FileStats::Json(stats) => stats.summary(),
241            FileStats::Markdown(stats) => stats.summary(),
242            FileStats::Text(stats) => stats.summary(),
243        }
244    }
245
246    /// Get context information suitable for agent prompts
247    pub fn context_info(&self) -> String {
248        match self {
249            FileStats::Json(stats) => stats.context_info(),
250            FileStats::Markdown(stats) => stats.context_info(),
251            FileStats::Text(stats) => stats.context_info(),
252        }
253    }
254}
255
256/// Statistics for JSON files
257#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
258pub struct JsonStats {
259    /// Whether the root is an array
260    pub is_array: bool,
261    /// Length if it's an array
262    pub array_length: Option<usize>,
263    /// Top-level keys (for objects) or sample element keys (for arrays)
264    pub top_level_keys: Vec<String>,
265    /// Maximum nesting depth
266    pub nested_depth: usize,
267    /// Sample of unique values for interesting fields (max 5 values each)
268    pub unique_values_sample: HashMap<String, Vec<String>>,
269    /// Estimated cardinality for fields with many unique values
270    pub cardinality_estimates: HashMap<String, usize>,
271    /// Preview of first few elements/properties (as JSON string)
272    pub preview: String,
273}
274
275impl JsonStats {
276    pub fn summary(&self) -> String {
277        if self.is_array {
278            format!(
279                "JSON array with {} elements, {} unique keys, depth {}",
280                self.array_length.unwrap_or(0),
281                self.top_level_keys.len(),
282                self.nested_depth
283            )
284        } else {
285            format!(
286                "JSON object with {} keys, depth {}",
287                self.top_level_keys.len(),
288                self.nested_depth
289            )
290        }
291    }
292
293    pub fn context_info(&self) -> String {
294        let mut info = self.summary();
295
296        if !self.top_level_keys.is_empty() {
297            info.push_str(&format!("\nKeys: {}", self.top_level_keys.join(", ")));
298        }
299
300        // Highlight high-cardinality fields
301        let high_card_fields: Vec<_> = self
302            .cardinality_estimates
303            .iter()
304            .filter(|&(_, &count)| count > 50)
305            .map(|(field, count)| format!("{} (~{})", field, count))
306            .collect();
307
308        if !high_card_fields.is_empty() {
309            info.push_str(&format!(
310                "\nHigh-cardinality fields: {}",
311                high_card_fields.join(", ")
312            ));
313        }
314
315        // Show sample values for categorical fields
316        for (field, values) in &self.unique_values_sample {
317            if values.len() <= 10 {
318                // Only show for low-cardinality categorical fields
319                info.push_str(&format!("\n{}: {}", field, values.join(", ")));
320            }
321        }
322
323        info
324    }
325}
326
327/// Statistics for Markdown files
328#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
329pub struct MarkdownStats {
330    /// Word count
331    pub word_count: usize,
332    /// Headings with their text and level (1-6)
333    pub headings: Vec<HeadingInfo>,
334    /// Number of code blocks
335    pub code_blocks: usize,
336    /// Number of links
337    pub links: usize,
338    /// Number of images
339    pub images: usize,
340    /// Number of tables
341    pub tables: usize,
342    /// Number of lists
343    pub lists: usize,
344    /// YAML/TOML frontmatter type if present
345    pub front_matter: Option<String>,
346    /// Preview of first few lines
347    pub preview: String,
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
351pub struct HeadingInfo {
352    pub text: String,
353    pub level: usize,
354}
355
356impl MarkdownStats {
357    pub fn summary(&self) -> String {
358        format!(
359            "Markdown: {} words, {} headings, {} code blocks, {} tables",
360            self.word_count,
361            self.headings.len(),
362            self.code_blocks,
363            self.tables
364        )
365    }
366
367    pub fn context_info(&self) -> String {
368        let mut info = self.summary();
369
370        if !self.headings.is_empty() {
371            info.push_str("\nStructure:");
372            for heading in &self.headings[..5.min(self.headings.len())] {
373                let indent = "  ".repeat(heading.level.saturating_sub(1));
374                info.push_str(&format!("\n{}{}", indent, heading.text));
375            }
376        }
377
378        if let Some(fm_type) = &self.front_matter {
379            info.push_str(&format!("\nFrontmatter: {}", fm_type));
380        }
381
382        info
383    }
384}
385
386/// Statistics for plain text files
387#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
388pub struct TextStats {
389    /// Number of lines
390    pub lines: usize,
391    /// Number of words
392    pub words: usize,
393    /// Number of characters
394    pub characters: usize,
395    /// Detected encoding
396    pub encoding: String,
397    /// Detected language (if any)
398    pub language: Option<String>,
399    /// Hints about the text structure
400    pub structure_hints: TextStructure,
401    /// Preview of first few lines
402    pub preview: String,
403}
404
405/// Detected structure patterns in text files
406#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
407#[serde(tag = "type", rename_all = "snake_case")]
408pub enum TextStructure {
409    LogFile {
410        log_level_counts: HashMap<String, usize>,
411    },
412    ConfigFile {
413        format: String,
414    },
415    CodeFile {
416        language: String,
417        function_count: usize,
418    },
419    PlainText,
420}
421
422impl TextStats {
423    pub fn summary(&self) -> String {
424        format!(
425            "Text: {} lines, {} words ({} chars)",
426            self.lines, self.words, self.characters
427        )
428    }
429
430    pub fn context_info(&self) -> String {
431        let mut info = self.summary();
432
433        if let Some(lang) = &self.language {
434            info.push_str(&format!("\nLanguage: {}", lang));
435        }
436
437        match &self.structure_hints {
438            TextStructure::LogFile { log_level_counts } => {
439                info.push_str("\nStructure: Log file");
440                let levels: Vec<_> = log_level_counts
441                    .iter()
442                    .map(|(level, count)| format!("{}: {}", level, count))
443                    .collect();
444                if !levels.is_empty() {
445                    info.push_str(&format!("\nLevels: {}", levels.join(", ")));
446                }
447            }
448            TextStructure::ConfigFile { format } => {
449                info.push_str(&format!("\nStructure: Config file ({})", format));
450            }
451            TextStructure::CodeFile {
452                language,
453                function_count,
454            } => {
455                info.push_str(&format!(
456                    "\nStructure: Code file ({}, {} functions)",
457                    language, function_count
458                ));
459            }
460            TextStructure::PlainText => {
461                info.push_str("\nStructure: Plain text");
462            }
463        }
464
465        info
466    }
467}
468
469/// Parameters for reading files with optional line ranges
470#[derive(Debug, Clone, Serialize, Deserialize, Default)]
471pub struct ReadParams {
472    pub start_line: Option<u64>,
473    pub end_line: Option<u64>,
474}
475
476/// File read result with content and metadata
477#[derive(Debug, Clone, Serialize, Deserialize)]
478pub struct FileReadResult {
479    pub content: String,
480    pub start_line: u64,
481    pub end_line: u64,
482    pub total_lines: u64,
483}
484
485/// Directory listing result
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct DirectoryListing {
488    pub path: String,
489    pub entries: Vec<DirectoryEntry>,
490}
491
492/// Directory entry information
493#[derive(Debug, Clone, Serialize, Deserialize)]
494pub struct DirectoryEntry {
495    pub name: String,
496    pub is_file: bool,
497    pub is_dir: bool,
498    pub size: Option<u64>,
499}
500
501/// Search result containing matches
502#[derive(Debug, Clone, Serialize, Deserialize)]
503pub struct SearchResult {
504    pub path: String,
505    pub matches: Vec<SearchMatch>,
506}
507
508/// Individual search match
509#[derive(Debug, Clone, Serialize, Deserialize)]
510pub struct SearchMatch {
511    pub file_path: String,
512    pub line_number: Option<u64>,
513    pub line_content: String,
514    pub match_text: String,
515}
516
517/// Legacy filesystem operations interface for tool compatibility
518#[async_trait]
519pub trait FileSystemOps: Send + Sync + std::fmt::Debug {
520    /// Read file with optional line range (includes line numbers)
521    async fn read(&self, path: &str, params: ReadParams) -> Result<FileReadResult>;
522
523    /// Read file as raw string without line numbers
524    async fn read_raw(&self, path: &str) -> Result<String> {
525        // Default implementation: use read() and strip line numbers
526        let result = self.read(path, ReadParams::default()).await?;
527        // Strip line number prefixes if present
528        if result.content.contains("→") {
529            Ok(result
530                .content
531                .lines()
532                .map(|line| {
533                    if let Some(pos) = line.find("→") {
534                        &line[pos + 1..]
535                    } else {
536                        line
537                    }
538                })
539                .collect::<Vec<_>>()
540                .join("\n"))
541        } else {
542            Ok(result.content)
543        }
544    }
545
546    /// Read file with line numbers and optional line range
547    async fn read_with_line_numbers(
548        &self,
549        path: &str,
550        params: ReadParams,
551    ) -> Result<FileReadResult> {
552        self.read(path, params).await
553    }
554
555    /// Write content to file
556    async fn write(&self, path: &str, content: &str) -> Result<()>;
557
558    /// List directory contents with metadata
559    async fn list(&self, path: &str) -> Result<DirectoryListing>;
560
561    /// Delete file or directory
562    async fn delete(&self, path: &str, recursive: bool) -> Result<()>;
563
564    /// Search files and content
565    async fn search(
566        &self,
567        path: &str,
568        content_pattern: Option<&str>,
569        file_pattern: Option<&str>,
570    ) -> Result<SearchResult>;
571
572    /// Copy file (legacy - should use shell commands)
573    async fn copy(&self, from: &str, to: &str) -> Result<()>;
574
575    /// Move file (legacy - should use shell commands)
576    async fn move_file(&self, from: &str, to: &str) -> Result<()>;
577
578    /// Create directory (legacy - directories created automatically)
579    async fn mkdir(&self, path: &str) -> Result<()>;
580
581    /// Get file metadata
582    async fn info(&self, path: &str) -> Result<FileMetadata>;
583
584    /// List directory tree (legacy - same as list)
585    async fn tree(&self, path: &str) -> Result<DirectoryListing>;
586}