distri-types 0.3.8

Shared message, tool, and config types for Distri
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
use anyhow::Result;
use async_trait::async_trait;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};

/// Context information for file storage operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileContext {
    /// Thread ID for organizing files
    pub thread_id: String,
    /// Task ID if available
    pub task_id: Option<String>,
    /// Tool call ID that generated this content
    pub tool_call_id: Option<String>,
    /// Content type/mime type hint
    pub content_type: Option<String>,
    /// Original filename if content represents a file
    pub original_filename: Option<String>,
}

/// Pure filesystem metadata about a file - no artifact context
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
pub struct FileMetadata {
    /// Unique file ID
    pub file_id: String,
    /// Relative path from filesystem base
    pub relative_path: String,
    /// File size in bytes
    pub size: u64,
    /// Content type/mime type if known
    pub content_type: Option<String>,
    /// Original filename if available
    pub original_filename: Option<String>,
    /// When the file was created
    #[schemars(with = "String")]
    pub created_at: chrono::DateTime<chrono::Utc>,
    /// When the file was last modified
    #[schemars(with = "String")]
    pub updated_at: chrono::DateTime<chrono::Utc>,
    /// File checksum for integrity verification
    pub checksum: Option<String>,
    /// Rich statistics about the file content
    pub stats: Option<FileStats>,
    /// Short preview of the content for context
    pub preview: Option<String>,
}

/// Artifact metadata that combines filesystem metadata with context information
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
pub struct Artifact {
    /// Filesystem metadata
    pub file_metadata: FileMetadata,
    /// Associated thread ID
    pub thread_id: String,
    /// Associated task ID if available
    pub task_id: Option<String>,
    /// Tool call ID that generated this artifact
    pub tool_call_id: Option<String>,
}

impl FileMetadata {
    /// Get the full display name for this file
    pub fn display_name(&self) -> String {
        self.original_filename
            .clone()
            .unwrap_or_else(|| format!("file_{}", &self.file_id[..8]))
    }

    /// Get a human readable size string
    pub fn size_display(&self) -> String {
        let size = self.size as f64;
        if size < 1024.0 {
            format!("{}B", self.size)
        } else if size < 1024.0 * 1024.0 {
            format!("{:.1}KB", size / 1024.0)
        } else if size < 1024.0 * 1024.0 * 1024.0 {
            format!("{:.1}MB", size / (1024.0 * 1024.0))
        } else {
            format!("{:.1}GB", size / (1024.0 * 1024.0 * 1024.0))
        }
    }

    /// Check if this appears to be a text file
    pub fn is_text_file(&self) -> bool {
        self.content_type
            .as_ref()
            .map(|ct| ct.starts_with("text/") || ct.contains("json") || ct.contains("xml"))
            .unwrap_or(false)
    }

    /// Get a summary line for display
    pub fn summary(&self) -> String {
        format!(
            "{} ({}{})",
            self.display_name(),
            self.size_display(),
            if let Some(ct) = &self.content_type {
                format!(", {}", ct)
            } else {
                String::new()
            }
        )
    }
}

impl Artifact {
    /// Create a new artifact with file metadata and context
    pub fn new(
        file_metadata: FileMetadata,
        thread_id: String,
        task_id: Option<String>,
        tool_call_id: Option<String>,
    ) -> Self {
        Self {
            file_metadata,
            thread_id,
            task_id,
            tool_call_id,
        }
    }

    /// Get the artifact path in the namespaced format
    pub fn artifact_path(&self) -> String {
        if let Some(task_id) = &self.task_id {
            format!(
                "{}/artifact/{}/{}",
                self.thread_id, task_id, self.file_metadata.file_id
            )
        } else {
            format!("{}/artifact/{}", self.thread_id, self.file_metadata.file_id)
        }
    }

    /// Delegate display methods to file metadata
    pub fn display_name(&self) -> String {
        self.file_metadata.display_name()
    }

    pub fn size_display(&self) -> String {
        self.file_metadata.size_display()
    }

    pub fn summary(&self) -> String {
        self.file_metadata.summary()
    }
}

/// Artifact namespace for organizing artifacts by thread and task
/// Handles path creation logic consistently across the codebase
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash)]
pub struct ArtifactNamespace {
    /// Thread ID (required)
    pub thread_id: String,
    /// Task ID (optional - if None, uses thread-level namespace)
    pub task_id: Option<String>,
}

impl ArtifactNamespace {
    /// Create a new namespace from thread_id and optional task_id
    pub fn new(thread_id: String, task_id: Option<String>) -> Self {
        Self { thread_id, task_id }
    }

    /// Convert ID to short hex (8 chars like git commits)
    fn short_hex(id: &str) -> String {
        let mut hasher = DefaultHasher::new();
        id.hash(&mut hasher);
        format!("{:08x}", hasher.finish())
    }

    /// Get the thread-level namespace path: `threads/{short_thread}`
    pub fn thread_path(&self) -> String {
        let short_thread = Self::short_hex(&self.thread_id);
        format!("threads/{}", short_thread)
    }

    /// Get the task-level namespace path: `threads/{short_thread}/tasks/{short_task}`
    /// Returns None if task_id is not set
    pub fn task_path(&self) -> Option<String> {
        self.task_id.as_ref().map(|task_id| {
            let short_thread = Self::short_hex(&self.thread_id);
            let short_task = Self::short_hex(task_id);
            format!("threads/{}/tasks/{}", short_thread, short_task)
        })
    }

    /// Get the primary namespace path (task-level if available, otherwise thread-level)
    /// This is the path where artifacts should be saved
    pub fn primary_path(&self) -> String {
        self.task_path().unwrap_or_else(|| self.thread_path())
    }

    /// Get all paths that should be checked when listing artifacts
    /// Returns both thread-level and task-level paths (if task_id is set)
    /// This ensures list_artifacts can find artifacts saved at either level
    pub fn all_paths(&self) -> Vec<String> {
        let mut paths = vec![self.thread_path()];
        if let Some(task_path) = self.task_path() {
            paths.push(task_path);
        }
        paths
    }

    /// Parse a namespace path back into thread_id and task_id
    /// Handles both `threads/{hash}` and `threads/{hash}/tasks/{hash}` formats
    /// Note: This cannot reverse the hash to get the original UUIDs, so it returns None
    /// In practice, you should store the mapping or use the namespace directly
    pub fn from_path(_path: &str) -> Option<Self> {
        // We can't reverse the hash, so we return None
        // In practice, we'd need to store the mapping or use the namespace directly
        None
    }
}

/// Type-specific file statistics that provide rich metadata about file content
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum FileStats {
    Json(JsonStats),
    Markdown(MarkdownStats),
    Text(TextStats),
}

impl FileStats {
    /// Get the type of file stats as a string
    pub fn stats_type(&self) -> &'static str {
        match self {
            FileStats::Json(_) => "json",
            FileStats::Markdown(_) => "markdown",
            FileStats::Text(_) => "text",
        }
    }

    /// Extract a human-readable summary of the file stats
    pub fn summary(&self) -> String {
        match self {
            FileStats::Json(stats) => stats.summary(),
            FileStats::Markdown(stats) => stats.summary(),
            FileStats::Text(stats) => stats.summary(),
        }
    }

    /// Get context information suitable for agent prompts
    pub fn context_info(&self) -> String {
        match self {
            FileStats::Json(stats) => stats.context_info(),
            FileStats::Markdown(stats) => stats.context_info(),
            FileStats::Text(stats) => stats.context_info(),
        }
    }
}

/// Statistics for JSON files
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct JsonStats {
    /// Whether the root is an array
    pub is_array: bool,
    /// Length if it's an array
    pub array_length: Option<usize>,
    /// Top-level keys (for objects) or sample element keys (for arrays)
    pub top_level_keys: Vec<String>,
    /// Maximum nesting depth
    pub nested_depth: usize,
    /// Sample of unique values for interesting fields (max 5 values each)
    pub unique_values_sample: HashMap<String, Vec<String>>,
    /// Estimated cardinality for fields with many unique values
    pub cardinality_estimates: HashMap<String, usize>,
    /// Preview of first few elements/properties (as JSON string)
    pub preview: String,
}

impl JsonStats {
    pub fn summary(&self) -> String {
        if self.is_array {
            format!(
                "JSON array with {} elements, {} unique keys, depth {}",
                self.array_length.unwrap_or(0),
                self.top_level_keys.len(),
                self.nested_depth
            )
        } else {
            format!(
                "JSON object with {} keys, depth {}",
                self.top_level_keys.len(),
                self.nested_depth
            )
        }
    }

    pub fn context_info(&self) -> String {
        let mut info = self.summary();

        if !self.top_level_keys.is_empty() {
            info.push_str(&format!("\nKeys: {}", self.top_level_keys.join(", ")));
        }

        // Highlight high-cardinality fields
        let high_card_fields: Vec<_> = self
            .cardinality_estimates
            .iter()
            .filter(|&(_, &count)| count > 50)
            .map(|(field, count)| format!("{} (~{})", field, count))
            .collect();

        if !high_card_fields.is_empty() {
            info.push_str(&format!(
                "\nHigh-cardinality fields: {}",
                high_card_fields.join(", ")
            ));
        }

        // Show sample values for categorical fields
        for (field, values) in &self.unique_values_sample {
            if values.len() <= 10 {
                // Only show for low-cardinality categorical fields
                info.push_str(&format!("\n{}: {}", field, values.join(", ")));
            }
        }

        info
    }
}

/// Statistics for Markdown files
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct MarkdownStats {
    /// Word count
    pub word_count: usize,
    /// Headings with their text and level (1-6)
    pub headings: Vec<HeadingInfo>,
    /// Number of code blocks
    pub code_blocks: usize,
    /// Number of links
    pub links: usize,
    /// Number of images
    pub images: usize,
    /// Number of tables
    pub tables: usize,
    /// Number of lists
    pub lists: usize,
    /// YAML/TOML frontmatter type if present
    pub front_matter: Option<String>,
    /// Preview of first few lines
    pub preview: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct HeadingInfo {
    pub text: String,
    pub level: usize,
}

impl MarkdownStats {
    pub fn summary(&self) -> String {
        format!(
            "Markdown: {} words, {} headings, {} code blocks, {} tables",
            self.word_count,
            self.headings.len(),
            self.code_blocks,
            self.tables
        )
    }

    pub fn context_info(&self) -> String {
        let mut info = self.summary();

        if !self.headings.is_empty() {
            info.push_str("\nStructure:");
            for heading in &self.headings[..5.min(self.headings.len())] {
                let indent = "  ".repeat(heading.level.saturating_sub(1));
                info.push_str(&format!("\n{}{}", indent, heading.text));
            }
        }

        if let Some(fm_type) = &self.front_matter {
            info.push_str(&format!("\nFrontmatter: {}", fm_type));
        }

        info
    }
}

/// Statistics for plain text files
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct TextStats {
    /// Number of lines
    pub lines: usize,
    /// Number of words
    pub words: usize,
    /// Number of characters
    pub characters: usize,
    /// Detected encoding
    pub encoding: String,
    /// Detected language (if any)
    pub language: Option<String>,
    /// Hints about the text structure
    pub structure_hints: TextStructure,
    /// Preview of first few lines
    pub preview: String,
}

/// Detected structure patterns in text files
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum TextStructure {
    LogFile {
        log_level_counts: HashMap<String, usize>,
    },
    ConfigFile {
        format: String,
    },
    CodeFile {
        language: String,
        function_count: usize,
    },
    PlainText,
}

impl TextStats {
    pub fn summary(&self) -> String {
        format!(
            "Text: {} lines, {} words ({} chars)",
            self.lines, self.words, self.characters
        )
    }

    pub fn context_info(&self) -> String {
        let mut info = self.summary();

        if let Some(lang) = &self.language {
            info.push_str(&format!("\nLanguage: {}", lang));
        }

        match &self.structure_hints {
            TextStructure::LogFile { log_level_counts } => {
                info.push_str("\nStructure: Log file");
                let levels: Vec<_> = log_level_counts
                    .iter()
                    .map(|(level, count)| format!("{}: {}", level, count))
                    .collect();
                if !levels.is_empty() {
                    info.push_str(&format!("\nLevels: {}", levels.join(", ")));
                }
            }
            TextStructure::ConfigFile { format } => {
                info.push_str(&format!("\nStructure: Config file ({})", format));
            }
            TextStructure::CodeFile {
                language,
                function_count,
            } => {
                info.push_str(&format!(
                    "\nStructure: Code file ({}, {} functions)",
                    language, function_count
                ));
            }
            TextStructure::PlainText => {
                info.push_str("\nStructure: Plain text");
            }
        }

        info
    }
}

/// Parameters for reading files with optional line ranges
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ReadParams {
    pub start_line: Option<u64>,
    pub end_line: Option<u64>,
}

/// File read result with content and metadata
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileReadResult {
    pub content: String,
    pub start_line: u64,
    pub end_line: u64,
    pub total_lines: u64,
}

/// Directory listing result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirectoryListing {
    pub path: String,
    pub entries: Vec<DirectoryEntry>,
}

/// Directory entry information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirectoryEntry {
    pub name: String,
    pub is_file: bool,
    pub is_dir: bool,
    pub size: Option<u64>,
}

/// Search result containing matches
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
    pub path: String,
    pub matches: Vec<SearchMatch>,
}

/// Individual search match
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchMatch {
    pub file_path: String,
    pub line_number: Option<u64>,
    pub line_content: String,
    pub match_text: String,
}

/// Legacy filesystem operations interface for tool compatibility
#[async_trait]
pub trait FileSystemOps: Send + Sync + std::fmt::Debug {
    /// Read file with optional line range (includes line numbers)
    async fn read(&self, path: &str, params: ReadParams) -> Result<FileReadResult>;

    /// Read file as raw string without line numbers
    async fn read_raw(&self, path: &str) -> Result<String> {
        // Default implementation: use read() and strip line numbers
        let result = self.read(path, ReadParams::default()).await?;
        // Strip line number prefixes if present
        if result.content.contains("") {
            Ok(result
                .content
                .lines()
                .map(|line| {
                    if let Some(pos) = line.find("") {
                        &line[pos + 1..]
                    } else {
                        line
                    }
                })
                .collect::<Vec<_>>()
                .join("\n"))
        } else {
            Ok(result.content)
        }
    }

    /// Read file with line numbers and optional line range
    async fn read_with_line_numbers(
        &self,
        path: &str,
        params: ReadParams,
    ) -> Result<FileReadResult> {
        self.read(path, params).await
    }

    /// Write content to file
    async fn write(&self, path: &str, content: &str) -> Result<()>;

    /// List directory contents with metadata
    async fn list(&self, path: &str) -> Result<DirectoryListing>;

    /// Delete file or directory
    async fn delete(&self, path: &str, recursive: bool) -> Result<()>;

    /// Search files and content
    async fn search(
        &self,
        path: &str,
        content_pattern: Option<&str>,
        file_pattern: Option<&str>,
    ) -> Result<SearchResult>;

    /// Copy file (legacy - should use shell commands)
    async fn copy(&self, from: &str, to: &str) -> Result<()>;

    /// Move file (legacy - should use shell commands)
    async fn move_file(&self, from: &str, to: &str) -> Result<()>;

    /// Create directory (legacy - directories created automatically)
    async fn mkdir(&self, path: &str) -> Result<()>;

    /// Get file metadata
    async fn info(&self, path: &str) -> Result<FileMetadata>;

    /// List directory tree (legacy - same as list)
    async fn tree(&self, path: &str) -> Result<DirectoryListing>;
}