distri_types/
filesystem.rs

1use anyhow::Result;
2use async_trait::async_trait;
3use schemars::JsonSchema;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::collections::hash_map::DefaultHasher;
7use std::hash::{Hash, Hasher};
8
9/// Context information for file storage operations
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct FileContext {
12    /// Thread ID for organizing files
13    pub thread_id: String,
14    /// Task ID if available
15    pub task_id: Option<String>,
16    /// Tool call ID that generated this content
17    pub tool_call_id: Option<String>,
18    /// Content type/mime type hint
19    pub content_type: Option<String>,
20    /// Original filename if content represents a file
21    pub original_filename: Option<String>,
22}
23
24/// Pure filesystem metadata about a file - no artifact context
25#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
26pub struct FileMetadata {
27    /// Unique file ID
28    pub file_id: String,
29    /// Relative path from filesystem base
30    pub relative_path: String,
31    /// File size in bytes
32    pub size: u64,
33    /// Content type/mime type if known
34    pub content_type: Option<String>,
35    /// Original filename if available
36    pub original_filename: Option<String>,
37    /// When the file was created
38    #[schemars(with = "String")]
39    pub created_at: chrono::DateTime<chrono::Utc>,
40    /// When the file was last modified
41    #[schemars(with = "String")]
42    pub updated_at: chrono::DateTime<chrono::Utc>,
43    /// File checksum for integrity verification
44    pub checksum: Option<String>,
45    /// Rich statistics about the file content
46    pub stats: Option<FileStats>,
47    /// Short preview of the content for context
48    pub preview: Option<String>,
49}
50
51/// Artifact metadata that combines filesystem metadata with context information
52#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
53pub struct Artifact {
54    /// Filesystem metadata
55    pub file_metadata: FileMetadata,
56    /// Associated thread ID
57    pub thread_id: String,
58    /// Associated task ID if available
59    pub task_id: Option<String>,
60    /// Tool call ID that generated this artifact
61    pub tool_call_id: Option<String>,
62}
63
64impl FileMetadata {
65    /// Get the full display name for this file
66    pub fn display_name(&self) -> String {
67        self.original_filename
68            .clone()
69            .unwrap_or_else(|| format!("file_{}", &self.file_id[..8]))
70    }
71
72    /// Get a human readable size string
73    pub fn size_display(&self) -> String {
74        let size = self.size as f64;
75        if size < 1024.0 {
76            format!("{}B", self.size)
77        } else if size < 1024.0 * 1024.0 {
78            format!("{:.1}KB", size / 1024.0)
79        } else if size < 1024.0 * 1024.0 * 1024.0 {
80            format!("{:.1}MB", size / (1024.0 * 1024.0))
81        } else {
82            format!("{:.1}GB", size / (1024.0 * 1024.0 * 1024.0))
83        }
84    }
85
86    /// Check if this appears to be a text file
87    pub fn is_text_file(&self) -> bool {
88        self.content_type
89            .as_ref()
90            .map(|ct| ct.starts_with("text/") || ct.contains("json") || ct.contains("xml"))
91            .unwrap_or(false)
92    }
93
94    /// Get a summary line for display
95    pub fn summary(&self) -> String {
96        format!(
97            "{} ({}{})",
98            self.display_name(),
99            self.size_display(),
100            if let Some(ct) = &self.content_type {
101                format!(", {}", ct)
102            } else {
103                String::new()
104            }
105        )
106    }
107}
108
109impl Artifact {
110    /// Create a new artifact with file metadata and context
111    pub fn new(
112        file_metadata: FileMetadata,
113        thread_id: String,
114        task_id: Option<String>,
115        tool_call_id: Option<String>,
116    ) -> Self {
117        Self {
118            file_metadata,
119            thread_id,
120            task_id,
121            tool_call_id,
122        }
123    }
124
125    /// Get the artifact path in the namespaced format
126    pub fn artifact_path(&self) -> String {
127        if let Some(task_id) = &self.task_id {
128            format!(
129                "{}/artifact/{}/{}",
130                self.thread_id, task_id, self.file_metadata.file_id
131            )
132        } else {
133            format!("{}/artifact/{}", self.thread_id, self.file_metadata.file_id)
134        }
135    }
136
137    /// Delegate display methods to file metadata
138    pub fn display_name(&self) -> String {
139        self.file_metadata.display_name()
140    }
141
142    pub fn size_display(&self) -> String {
143        self.file_metadata.size_display()
144    }
145
146    pub fn summary(&self) -> String {
147        self.file_metadata.summary()
148    }
149}
150
151/// Artifact namespace for organizing artifacts by thread and task
152/// Handles path creation logic consistently across the codebase
153#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash)]
154pub struct ArtifactNamespace {
155    /// Thread ID (required)
156    pub thread_id: String,
157    /// Task ID (optional - if None, uses thread-level namespace)
158    pub task_id: Option<String>,
159}
160
161impl ArtifactNamespace {
162    /// Create a new namespace from thread_id and optional task_id
163    pub fn new(thread_id: String, task_id: Option<String>) -> Self {
164        Self {
165            thread_id,
166            task_id,
167        }
168    }
169
170    /// Convert ID to short hex (8 chars like git commits)
171    fn short_hex(id: &str) -> String {
172        let mut hasher = DefaultHasher::new();
173        id.hash(&mut hasher);
174        format!("{:08x}", hasher.finish())
175    }
176
177    /// Get the thread-level namespace path: `threads/{short_thread}`
178    pub fn thread_path(&self) -> String {
179        let short_thread = Self::short_hex(&self.thread_id);
180        format!("threads/{}", short_thread)
181    }
182
183    /// Get the task-level namespace path: `threads/{short_thread}/tasks/{short_task}`
184    /// Returns None if task_id is not set
185    pub fn task_path(&self) -> Option<String> {
186        self.task_id.as_ref().map(|task_id| {
187            let short_thread = Self::short_hex(&self.thread_id);
188            let short_task = Self::short_hex(task_id);
189            format!("threads/{}/tasks/{}", short_thread, short_task)
190        })
191    }
192
193    /// Get the primary namespace path (task-level if available, otherwise thread-level)
194    /// This is the path where artifacts should be saved
195    pub fn primary_path(&self) -> String {
196        self.task_path().unwrap_or_else(|| self.thread_path())
197    }
198
199    /// Get all paths that should be checked when listing artifacts
200    /// Returns both thread-level and task-level paths (if task_id is set)
201    /// This ensures list_artifacts can find artifacts saved at either level
202    pub fn all_paths(&self) -> Vec<String> {
203        let mut paths = vec![self.thread_path()];
204        if let Some(task_path) = self.task_path() {
205            paths.push(task_path);
206        }
207        paths
208    }
209
210    /// Parse a namespace path back into thread_id and task_id
211    /// Handles both `threads/{hash}` and `threads/{hash}/tasks/{hash}` formats
212    /// Note: This cannot reverse the hash to get the original UUIDs, so it returns None
213    /// In practice, you should store the mapping or use the namespace directly
214    pub fn from_path(_path: &str) -> Option<Self> {
215        // We can't reverse the hash, so we return None
216        // In practice, we'd need to store the mapping or use the namespace directly
217        None
218    }
219}
220
221/// Type-specific file statistics that provide rich metadata about file content
222#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
223#[serde(tag = "type", rename_all = "snake_case")]
224pub enum FileStats {
225    Json(JsonStats),
226    Markdown(MarkdownStats),
227    Text(TextStats),
228}
229
230impl FileStats {
231    /// Get the type of file stats as a string
232    pub fn stats_type(&self) -> &'static str {
233        match self {
234            FileStats::Json(_) => "json",
235            FileStats::Markdown(_) => "markdown",
236            FileStats::Text(_) => "text",
237        }
238    }
239
240    /// Extract a human-readable summary of the file stats
241    pub fn summary(&self) -> String {
242        match self {
243            FileStats::Json(stats) => stats.summary(),
244            FileStats::Markdown(stats) => stats.summary(),
245            FileStats::Text(stats) => stats.summary(),
246        }
247    }
248
249    /// Get context information suitable for agent prompts
250    pub fn context_info(&self) -> String {
251        match self {
252            FileStats::Json(stats) => stats.context_info(),
253            FileStats::Markdown(stats) => stats.context_info(),
254            FileStats::Text(stats) => stats.context_info(),
255        }
256    }
257}
258
259/// Statistics for JSON files
260#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
261pub struct JsonStats {
262    /// Whether the root is an array
263    pub is_array: bool,
264    /// Length if it's an array
265    pub array_length: Option<usize>,
266    /// Top-level keys (for objects) or sample element keys (for arrays)
267    pub top_level_keys: Vec<String>,
268    /// Maximum nesting depth
269    pub nested_depth: usize,
270    /// Sample of unique values for interesting fields (max 5 values each)
271    pub unique_values_sample: HashMap<String, Vec<String>>,
272    /// Estimated cardinality for fields with many unique values
273    pub cardinality_estimates: HashMap<String, usize>,
274    /// Preview of first few elements/properties (as JSON string)
275    pub preview: String,
276}
277
278impl JsonStats {
279    pub fn summary(&self) -> String {
280        if self.is_array {
281            format!(
282                "JSON array with {} elements, {} unique keys, depth {}",
283                self.array_length.unwrap_or(0),
284                self.top_level_keys.len(),
285                self.nested_depth
286            )
287        } else {
288            format!(
289                "JSON object with {} keys, depth {}",
290                self.top_level_keys.len(),
291                self.nested_depth
292            )
293        }
294    }
295
296    pub fn context_info(&self) -> String {
297        let mut info = self.summary();
298
299        if !self.top_level_keys.is_empty() {
300            info.push_str(&format!("\nKeys: {}", self.top_level_keys.join(", ")));
301        }
302
303        // Highlight high-cardinality fields
304        let high_card_fields: Vec<_> = self
305            .cardinality_estimates
306            .iter()
307            .filter(|&(_, &count)| count > 50)
308            .map(|(field, count)| format!("{} (~{})", field, count))
309            .collect();
310
311        if !high_card_fields.is_empty() {
312            info.push_str(&format!(
313                "\nHigh-cardinality fields: {}",
314                high_card_fields.join(", ")
315            ));
316        }
317
318        // Show sample values for categorical fields
319        for (field, values) in &self.unique_values_sample {
320            if values.len() <= 10 {
321                // Only show for low-cardinality categorical fields
322                info.push_str(&format!("\n{}: {}", field, values.join(", ")));
323            }
324        }
325
326        info
327    }
328}
329
330/// Statistics for Markdown files
331#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
332pub struct MarkdownStats {
333    /// Word count
334    pub word_count: usize,
335    /// Headings with their text and level (1-6)
336    pub headings: Vec<HeadingInfo>,
337    /// Number of code blocks
338    pub code_blocks: usize,
339    /// Number of links
340    pub links: usize,
341    /// Number of images
342    pub images: usize,
343    /// Number of tables
344    pub tables: usize,
345    /// Number of lists
346    pub lists: usize,
347    /// YAML/TOML frontmatter type if present
348    pub front_matter: Option<String>,
349    /// Preview of first few lines
350    pub preview: String,
351}
352
353#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
354pub struct HeadingInfo {
355    pub text: String,
356    pub level: usize,
357}
358
359impl MarkdownStats {
360    pub fn summary(&self) -> String {
361        format!(
362            "Markdown: {} words, {} headings, {} code blocks, {} tables",
363            self.word_count,
364            self.headings.len(),
365            self.code_blocks,
366            self.tables
367        )
368    }
369
370    pub fn context_info(&self) -> String {
371        let mut info = self.summary();
372
373        if !self.headings.is_empty() {
374            info.push_str("\nStructure:");
375            for heading in &self.headings[..5.min(self.headings.len())] {
376                let indent = "  ".repeat(heading.level.saturating_sub(1));
377                info.push_str(&format!("\n{}{}", indent, heading.text));
378            }
379        }
380
381        if let Some(fm_type) = &self.front_matter {
382            info.push_str(&format!("\nFrontmatter: {}", fm_type));
383        }
384
385        info
386    }
387}
388
389/// Statistics for plain text files
390#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
391pub struct TextStats {
392    /// Number of lines
393    pub lines: usize,
394    /// Number of words
395    pub words: usize,
396    /// Number of characters
397    pub characters: usize,
398    /// Detected encoding
399    pub encoding: String,
400    /// Detected language (if any)
401    pub language: Option<String>,
402    /// Hints about the text structure
403    pub structure_hints: TextStructure,
404    /// Preview of first few lines
405    pub preview: String,
406}
407
408/// Detected structure patterns in text files
409#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
410#[serde(tag = "type", rename_all = "snake_case")]
411pub enum TextStructure {
412    LogFile {
413        log_level_counts: HashMap<String, usize>,
414    },
415    ConfigFile {
416        format: String,
417    },
418    CodeFile {
419        language: String,
420        function_count: usize,
421    },
422    PlainText,
423}
424
425impl TextStats {
426    pub fn summary(&self) -> String {
427        format!(
428            "Text: {} lines, {} words ({} chars)",
429            self.lines, self.words, self.characters
430        )
431    }
432
433    pub fn context_info(&self) -> String {
434        let mut info = self.summary();
435
436        if let Some(lang) = &self.language {
437            info.push_str(&format!("\nLanguage: {}", lang));
438        }
439
440        match &self.structure_hints {
441            TextStructure::LogFile { log_level_counts } => {
442                info.push_str("\nStructure: Log file");
443                let levels: Vec<_> = log_level_counts
444                    .iter()
445                    .map(|(level, count)| format!("{}: {}", level, count))
446                    .collect();
447                if !levels.is_empty() {
448                    info.push_str(&format!("\nLevels: {}", levels.join(", ")));
449                }
450            }
451            TextStructure::ConfigFile { format } => {
452                info.push_str(&format!("\nStructure: Config file ({})", format));
453            }
454            TextStructure::CodeFile {
455                language,
456                function_count,
457            } => {
458                info.push_str(&format!(
459                    "\nStructure: Code file ({}, {} functions)",
460                    language, function_count
461                ));
462            }
463            TextStructure::PlainText => {
464                info.push_str("\nStructure: Plain text");
465            }
466        }
467
468        info
469    }
470}
471
472/// Parameters for reading files with optional line ranges
473#[derive(Debug, Clone, Serialize, Deserialize, Default)]
474pub struct ReadParams {
475    pub start_line: Option<u64>,
476    pub end_line: Option<u64>,
477}
478
479/// File read result with content and metadata
480#[derive(Debug, Clone, Serialize, Deserialize)]
481pub struct FileReadResult {
482    pub content: String,
483    pub start_line: u64,
484    pub end_line: u64,
485    pub total_lines: u64,
486}
487
488/// Directory listing result
489#[derive(Debug, Clone, Serialize, Deserialize)]
490pub struct DirectoryListing {
491    pub path: String,
492    pub entries: Vec<DirectoryEntry>,
493}
494
495/// Directory entry information
496#[derive(Debug, Clone, Serialize, Deserialize)]
497pub struct DirectoryEntry {
498    pub name: String,
499    pub is_file: bool,
500    pub is_dir: bool,
501    pub size: Option<u64>,
502}
503
504/// Search result containing matches
505#[derive(Debug, Clone, Serialize, Deserialize)]
506pub struct SearchResult {
507    pub path: String,
508    pub matches: Vec<SearchMatch>,
509}
510
511/// Individual search match
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct SearchMatch {
514    pub file_path: String,
515    pub line_number: Option<u64>,
516    pub line_content: String,
517    pub match_text: String,
518}
519
520/// Legacy filesystem operations interface for tool compatibility
521#[async_trait]
522pub trait FileSystemOps: Send + Sync + std::fmt::Debug {
523    /// Read file with optional line range (includes line numbers)
524    async fn read(&self, path: &str, params: ReadParams) -> Result<FileReadResult>;
525
526    /// Read file as raw string without line numbers
527    async fn read_raw(&self, path: &str) -> Result<String> {
528        // Default implementation: use read() and strip line numbers
529        let result = self.read(path, ReadParams::default()).await?;
530        // Strip line number prefixes if present
531        if result.content.contains("→") {
532            Ok(result
533                .content
534                .lines()
535                .map(|line| {
536                    if let Some(pos) = line.find("→") {
537                        &line[pos + 1..]
538                    } else {
539                        line
540                    }
541                })
542                .collect::<Vec<_>>()
543                .join("\n"))
544        } else {
545            Ok(result.content)
546        }
547    }
548
549    /// Read file with line numbers and optional line range
550    async fn read_with_line_numbers(
551        &self,
552        path: &str,
553        params: ReadParams,
554    ) -> Result<FileReadResult> {
555        self.read(path, params).await
556    }
557
558    /// Write content to file
559    async fn write(&self, path: &str, content: &str) -> Result<()>;
560
561    /// List directory contents with metadata
562    async fn list(&self, path: &str) -> Result<DirectoryListing>;
563
564    /// Delete file or directory
565    async fn delete(&self, path: &str, recursive: bool) -> Result<()>;
566
567    /// Search files and content
568    async fn search(
569        &self,
570        path: &str,
571        content_pattern: Option<&str>,
572        file_pattern: Option<&str>,
573    ) -> Result<SearchResult>;
574
575    /// Copy file (legacy - should use shell commands)
576    async fn copy(&self, from: &str, to: &str) -> Result<()>;
577
578    /// Move file (legacy - should use shell commands)
579    async fn move_file(&self, from: &str, to: &str) -> Result<()>;
580
581    /// Create directory (legacy - directories created automatically)
582    async fn mkdir(&self, path: &str) -> Result<()>;
583
584    /// Get file metadata
585    async fn info(&self, path: &str) -> Result<FileMetadata>;
586
587    /// List directory tree (legacy - same as list)
588    async fn tree(&self, path: &str) -> Result<DirectoryListing>;
589}