blz_core/
storage.rs

1use crate::{Error, LlmsJson, Result, Source, SourceDescriptor, profile};
2use chrono::Utc;
3use directories::{BaseDirs, ProjectDirs};
4use std::fs;
5use std::path::{Path, PathBuf};
6use tracing::{debug, info, warn};
7
8/// Maximum allowed alias length to match CLI constraints
9const MAX_ALIAS_LEN: usize = 64;
10
11/// Local filesystem storage for cached llms.txt documentation
12pub struct Storage {
13    root_dir: PathBuf,
14    config_dir: PathBuf,
15}
16
17impl Storage {
18    fn sanitize_variant_file_name(name: &str) -> String {
19        // Only allow a conservative set of filename characters to avoid
20        // accidentally writing outside the tool directory or producing
21        // surprising paths. Anything else becomes an underscore so that the
22        // resulting filename stays predictable and safe to use across
23        // platforms.
24        let mut sanitized: String = name
25            .chars()
26            .map(|c| {
27                if c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-') {
28                    c
29                } else {
30                    '_'
31                }
32            })
33            .collect();
34
35        // Collapse any ".." segments that could be introduced either by the
36        // caller or by the substitution above. This keeps the path rooted at
37        // the alias directory even if callers pass traversal attempts.
38        while sanitized.contains("..") {
39            sanitized = sanitized.replace("..", "_");
40        }
41
42        if sanitized.is_empty() {
43            "llms.txt".to_string()
44        } else {
45            sanitized
46        }
47    }
48
49    // Storage uses consistent filenames regardless of source URL:
50    // - llms.txt for content (even if fetched from llms-full.txt)
51    // - llms.json for parsed data
52    // - metadata.json for source metadata
53
54    /// Creates a new storage instance with the default root directory
55    pub fn new() -> Result<Self> {
56        // Test/dev override: allow BLZ_DATA_DIR to set the root directory explicitly
57        if let Ok(dir) = std::env::var("BLZ_DATA_DIR") {
58            let root = PathBuf::from(dir);
59            let config_dir = Self::default_config_dir()?;
60            return Self::with_paths(root, config_dir);
61        }
62
63        // Use XDG_DATA_HOME if explicitly set
64        let root_dir = if let Ok(xdg) = std::env::var("XDG_DATA_HOME") {
65            let trimmed = xdg.trim();
66            if trimmed.is_empty() {
67                Self::fallback_data_dir()?
68            } else {
69                PathBuf::from(trimmed).join(profile::app_dir_slug())
70            }
71        } else {
72            Self::fallback_data_dir()?
73        };
74
75        // Check for migration from old cache directory
76        Self::check_and_migrate_old_cache(&root_dir);
77
78        let config_dir = Self::default_config_dir()?;
79        Self::with_paths(root_dir, config_dir)
80    }
81
82    /// Fallback data directory when `XDG_DATA_HOME` is not set
83    fn fallback_data_dir() -> Result<PathBuf> {
84        // Use ~/.blz/ for data (same location as config for non-XDG systems)
85        let home = directories::BaseDirs::new()
86            .ok_or_else(|| Error::Storage("Failed to determine home directory".into()))?;
87        Ok(home.home_dir().join(profile::dot_dir_slug()))
88    }
89
90    /// Determine the default configuration directory honoring overrides
91    fn default_config_dir() -> Result<PathBuf> {
92        if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
93            let trimmed = dir.trim();
94            if !trimmed.is_empty() {
95                return Ok(PathBuf::from(trimmed));
96            }
97        }
98
99        if let Ok(dir) = std::env::var("BLZ_GLOBAL_CONFIG_DIR") {
100            let trimmed = dir.trim();
101            if !trimmed.is_empty() {
102                return Ok(PathBuf::from(trimmed));
103            }
104        }
105
106        if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
107            let trimmed = xdg.trim();
108            if !trimmed.is_empty() {
109                return Ok(PathBuf::from(trimmed).join(profile::app_dir_slug()));
110            }
111        }
112
113        if let Some(base_dirs) = BaseDirs::new() {
114            return Ok(base_dirs.home_dir().join(profile::dot_dir_slug()));
115        }
116
117        Err(Error::Storage(
118            "Failed to determine configuration directory".into(),
119        ))
120    }
121
122    /// Creates a new storage instance with a custom root directory
123    pub fn with_root(root_dir: PathBuf) -> Result<Self> {
124        let config_dir = root_dir.join("config");
125        Self::with_paths(root_dir, config_dir)
126    }
127
128    /// Creates a new storage instance with explicit data and config directories
129    pub fn with_paths(root_dir: PathBuf, config_dir: PathBuf) -> Result<Self> {
130        fs::create_dir_all(&root_dir)
131            .map_err(|e| Error::Storage(format!("Failed to create root directory: {e}")))?;
132        fs::create_dir_all(&config_dir)
133            .map_err(|e| Error::Storage(format!("Failed to create config directory: {e}")))?;
134
135        Ok(Self {
136            root_dir,
137            config_dir,
138        })
139    }
140
141    /// Returns the root data directory path
142    #[must_use]
143    pub fn root_dir(&self) -> &Path {
144        &self.root_dir
145    }
146
147    /// Returns the root configuration directory path used for descriptors
148    #[must_use]
149    pub fn config_dir(&self) -> &Path {
150        &self.config_dir
151    }
152
153    fn descriptors_dir(&self) -> PathBuf {
154        self.config_dir.join("sources")
155    }
156
157    /// Returns the path to the descriptor TOML for a source
158    pub fn descriptor_path(&self, alias: &str) -> Result<PathBuf> {
159        Self::validate_alias(alias)?;
160        Ok(self.descriptors_dir().join(format!("{alias}.toml")))
161    }
162
163    /// Persist a descriptor to disk, creating parent directories if necessary
164    pub fn save_descriptor(&self, descriptor: &SourceDescriptor) -> Result<()> {
165        let path = self.descriptor_path(&descriptor.alias)?;
166        if let Some(parent) = path.parent() {
167            fs::create_dir_all(parent)
168                .map_err(|e| Error::Storage(format!("Failed to create descriptor dir: {e}")))?;
169        }
170
171        let toml = toml::to_string_pretty(descriptor)
172            .map_err(|e| Error::Storage(format!("Failed to serialize descriptor: {e}")))?;
173        fs::write(&path, toml)
174            .map_err(|e| Error::Storage(format!("Failed to write descriptor: {e}")))?;
175        Ok(())
176    }
177
178    /// Load a descriptor if it exists
179    pub fn load_descriptor(&self, alias: &str) -> Result<Option<SourceDescriptor>> {
180        let path = self.descriptor_path(alias)?;
181        if !path.exists() {
182            return Ok(None);
183        }
184
185        let contents = fs::read_to_string(&path)
186            .map_err(|e| Error::Storage(format!("Failed to read descriptor: {e}")))?;
187        let descriptor = toml::from_str::<SourceDescriptor>(&contents)
188            .map_err(|e| Error::Storage(format!("Failed to parse descriptor: {e}")))?;
189        Ok(Some(descriptor))
190    }
191
192    /// Remove descriptor file for an alias if present
193    pub fn remove_descriptor(&self, alias: &str) -> Result<()> {
194        let path = self.descriptor_path(alias)?;
195        if path.exists() {
196            fs::remove_file(&path)
197                .map_err(|e| Error::Storage(format!("Failed to remove descriptor: {e}")))?;
198        }
199        Ok(())
200    }
201
202    /// Returns the directory path for a given alias
203    pub fn tool_dir(&self, source: &str) -> Result<PathBuf> {
204        // Validate alias to prevent directory traversal attacks
205        Self::validate_alias(source)?;
206        Ok(self.root_dir.join("sources").join(source))
207    }
208
209    /// Resolve the on-disk path for a specific flavored content file.
210    fn variant_file_path(&self, source: &str, file_name: &str) -> Result<PathBuf> {
211        let sanitized = Self::sanitize_variant_file_name(file_name);
212        Ok(self.tool_dir(source)?.join(sanitized))
213    }
214
215    /// Ensures the directory for an alias exists and returns its path
216    pub fn ensure_tool_dir(&self, source: &str) -> Result<PathBuf> {
217        let dir = self.tool_dir(source)?;
218        fs::create_dir_all(&dir)
219            .map_err(|e| Error::Storage(format!("Failed to create tool directory: {e}")))?;
220        Ok(dir)
221    }
222
223    /// Validate that an alias is safe to use as a directory name
224    ///
225    /// This validation is unified with CLI constraints to prevent inconsistencies
226    /// between what the CLI accepts and what storage can handle.
227    fn validate_alias(alias: &str) -> Result<()> {
228        // Check for empty alias
229        if alias.is_empty() {
230            return Err(Error::Storage("Alias cannot be empty".into()));
231        }
232
233        // Disallow leading hyphen to avoid CLI parsing ambiguities
234        if alias.starts_with('-') {
235            return Err(Error::Storage(format!(
236                "Invalid alias '{alias}': cannot start with '-'"
237            )));
238        }
239
240        // Check for path traversal attempts
241        if alias.contains("..") || alias.contains('/') || alias.contains('\\') {
242            return Err(Error::Storage(format!(
243                "Invalid alias '{alias}': contains path traversal characters"
244            )));
245        }
246
247        // Check for special filesystem characters
248        if alias.starts_with('.') || alias.contains('\0') {
249            return Err(Error::Storage(format!(
250                "Invalid alias '{alias}': contains invalid filesystem characters"
251            )));
252        }
253
254        // Check for reserved names on Windows
255        #[cfg(target_os = "windows")]
256        {
257            const RESERVED_NAMES: &[&str] = &[
258                "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
259                "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
260                "LPT9",
261            ];
262
263            let upper_alias = alias.to_uppercase();
264            if RESERVED_NAMES.contains(&upper_alias.as_str()) {
265                return Err(Error::Storage(format!(
266                    "Invalid alias '{}': reserved name on Windows",
267                    alias
268                )));
269            }
270        }
271
272        // Check length (keep consistent with CLI policy)
273        if alias.len() > MAX_ALIAS_LEN {
274            return Err(Error::Storage(format!(
275                "Invalid alias '{alias}': exceeds maximum length of {MAX_ALIAS_LEN} characters"
276            )));
277        }
278
279        // Only allow ASCII alphanumeric, dash, underscore
280        if !alias
281            .chars()
282            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
283        {
284            return Err(Error::Storage(format!(
285                "Invalid alias '{alias}': only [A-Za-z0-9_-] are allowed"
286            )));
287        }
288
289        Ok(())
290    }
291
292    /// Returns the path to the llms.txt file for a source
293    pub fn llms_txt_path(&self, source: &str) -> Result<PathBuf> {
294        self.variant_file_path(source, "llms.txt")
295    }
296
297    /// Returns the path to the llms.json file for a source
298    pub fn llms_json_path(&self, source: &str) -> Result<PathBuf> {
299        Ok(self.tool_dir(source)?.join("llms.json"))
300    }
301
302    /// Returns the path to the search index directory for a source
303    pub fn index_dir(&self, source: &str) -> Result<PathBuf> {
304        Ok(self.tool_dir(source)?.join(".index"))
305    }
306
307    /// Returns the path to the archive directory for a source
308    pub fn archive_dir(&self, source: &str) -> Result<PathBuf> {
309        Ok(self.tool_dir(source)?.join(".archive"))
310    }
311
312    /// Returns the path to the metadata file for a source
313    pub fn metadata_path(&self, source: &str) -> Result<PathBuf> {
314        Ok(self.tool_dir(source)?.join("metadata.json"))
315    }
316
317    /// Returns the path to the anchors mapping file for a source
318    pub fn anchors_map_path(&self, source: &str) -> Result<PathBuf> {
319        Ok(self.tool_dir(source)?.join("anchors.json"))
320    }
321
322    /// Saves the llms.txt content for a source
323    pub fn save_llms_txt(&self, source: &str, content: &str) -> Result<()> {
324        self.ensure_tool_dir(source)?;
325        let path = self.llms_txt_path(source)?;
326
327        let tmp_path = path.with_extension("tmp");
328        fs::write(&tmp_path, content)
329            .map_err(|e| Error::Storage(format!("Failed to write llms.txt: {e}")))?;
330
331        #[cfg(target_os = "windows")]
332        if path.exists() {
333            fs::remove_file(&path)
334                .map_err(|e| Error::Storage(format!("Failed to remove existing llms.txt: {e}")))?;
335        }
336
337        fs::rename(&tmp_path, &path)
338            .map_err(|e| Error::Storage(format!("Failed to commit llms.txt: {e}")))?;
339
340        debug!("Saved llms.txt for {}", source);
341        Ok(())
342    }
343
344    /// Loads the llms.txt content for a source
345    pub fn load_llms_txt(&self, source: &str) -> Result<String> {
346        let path = self.llms_txt_path(source)?;
347        fs::read_to_string(&path)
348            .map_err(|e| Error::Storage(format!("Failed to read llms.txt: {e}")))
349    }
350
351    /// Saves the parsed llms.json data for a source
352    pub fn save_llms_json(&self, source: &str, data: &LlmsJson) -> Result<()> {
353        self.ensure_tool_dir(source)?;
354        let path = self.llms_json_path(source)?;
355        let json = serde_json::to_string_pretty(data)
356            .map_err(|e| Error::Storage(format!("Failed to serialize JSON: {e}")))?;
357
358        let tmp_path = path.with_extension("json.tmp");
359        fs::write(&tmp_path, json)
360            .map_err(|e| Error::Storage(format!("Failed to write llms.json: {e}")))?;
361
362        #[cfg(target_os = "windows")]
363        if path.exists() {
364            fs::remove_file(&path)
365                .map_err(|e| Error::Storage(format!("Failed to remove existing llms.json: {e}")))?;
366        }
367        fs::rename(&tmp_path, &path)
368            .map_err(|e| Error::Storage(format!("Failed to commit llms.json: {e}")))?;
369
370        debug!("Saved llms.json for {}", source);
371        Ok(())
372    }
373
374    /// Loads the parsed llms.json data for a source
375    pub fn load_llms_json(&self, source: &str) -> Result<LlmsJson> {
376        let path = self.llms_json_path(source)?;
377        if !path.exists() {
378            return Err(Error::Storage(format!(
379                "llms.json missing for source '{source}'"
380            )));
381        }
382        let json = fs::read_to_string(&path)
383            .map_err(|e| Error::Storage(format!("Failed to read llms.json: {e}")))?;
384
385        // Try to detect old v0.4.x format
386        if let Ok(raw_value) = serde_json::from_str::<serde_json::Value>(&json) {
387            if let Some(obj) = raw_value.as_object() {
388                // Old format has "alias" field instead of "source"
389                if obj.contains_key("alias")
390                    || (obj.contains_key("source") && obj["source"].is_object())
391                {
392                    return Err(Error::Storage(format!(
393                        "Incompatible cache format detected for source '{source}'.\n\n\
394                         This cache was created with blz v0.4.x or earlier and is not compatible with v0.5.0+.\n\n\
395                         To fix this, clear your cache:\n  \
396                         blz clear --force\n\n\
397                         Then re-add your sources."
398                    )));
399                }
400            }
401        }
402
403        let data = serde_json::from_str(&json)
404            .map_err(|e| Error::Storage(format!("Failed to parse llms.json: {e}")))?;
405        Ok(data)
406    }
407
408    /// Saves source metadata for a source
409    pub fn save_source_metadata(&self, source: &str, metadata: &Source) -> Result<()> {
410        self.ensure_tool_dir(source)?;
411        let path = self.metadata_path(source)?;
412        let json = serde_json::to_string_pretty(metadata)
413            .map_err(|e| Error::Storage(format!("Failed to serialize metadata: {e}")))?;
414
415        // Write to a temp file first to ensure atomicity
416        let tmp_path = path.with_extension("json.tmp");
417        fs::write(&tmp_path, &json)
418            .map_err(|e| Error::Storage(format!("Failed to write temp metadata: {e}")))?;
419
420        // Atomically rename temp file to final path (handle Windows overwrite)
421        #[cfg(target_os = "windows")]
422        if path.exists() {
423            fs::remove_file(&path)
424                .map_err(|e| Error::Storage(format!("Failed to remove existing metadata: {e}")))?;
425        }
426        fs::rename(&tmp_path, &path)
427            .map_err(|e| Error::Storage(format!("Failed to persist metadata: {e}")))?;
428
429        debug!("Saved metadata for {}", source);
430        Ok(())
431    }
432
433    /// Save anchors remap JSON for a source
434    pub fn save_anchors_map(&self, source: &str, map: &crate::AnchorsMap) -> Result<()> {
435        self.ensure_tool_dir(source)?;
436        let path = self.anchors_map_path(source)?;
437        let json = serde_json::to_string_pretty(map)
438            .map_err(|e| Error::Storage(format!("Failed to serialize anchors map: {e}")))?;
439        fs::write(&path, json)
440            .map_err(|e| Error::Storage(format!("Failed to write anchors map: {e}")))?;
441        Ok(())
442    }
443
444    /// Loads source metadata for a source if it exists
445    pub fn load_source_metadata(&self, source: &str) -> Result<Option<Source>> {
446        let path = self.metadata_path(source)?;
447        if !path.exists() {
448            return Ok(None);
449        }
450        let json = fs::read_to_string(&path)
451            .map_err(|e| Error::Storage(format!("Failed to read metadata: {e}")))?;
452        let metadata = serde_json::from_str(&json)
453            .map_err(|e| Error::Storage(format!("Failed to parse metadata: {e}")))?;
454        Ok(Some(metadata))
455    }
456
457    /// Checks if a source exists in storage
458    #[must_use]
459    pub fn exists(&self, source: &str) -> bool {
460        self.llms_json_path(source)
461            .map(|path| path.exists())
462            .unwrap_or(false)
463    }
464
465    /// Lists all cached sources
466    #[must_use]
467    pub fn list_sources(&self) -> Vec<String> {
468        let mut sources = Vec::new();
469        let sources_dir = self.root_dir.join("sources");
470
471        if let Ok(entries) = fs::read_dir(&sources_dir) {
472            for entry in entries.flatten() {
473                if entry.path().is_dir() {
474                    if let Some(name) = entry.file_name().to_str() {
475                        if !name.starts_with('.') && self.exists(name) {
476                            sources.push(name.to_string());
477                        }
478                    }
479                }
480            }
481        }
482
483        sources.sort();
484        sources
485    }
486
487    /// Clears the entire cache directory, removing all sources and their data.
488    ///
489    /// This is a destructive operation that cannot be undone. Use with caution.
490    ///
491    /// # Errors
492    ///
493    /// Returns an error if the cache directory cannot be removed or recreated.
494    pub fn clear_cache(&self) -> Result<()> {
495        // Remove the entire root directory
496        if self.root_dir.exists() {
497            fs::remove_dir_all(&self.root_dir)
498                .map_err(|e| Error::Storage(format!("Failed to remove cache directory: {e}")))?;
499        }
500
501        // Recreate empty root directory
502        fs::create_dir_all(&self.root_dir)
503            .map_err(|e| Error::Storage(format!("Failed to recreate cache directory: {e}")))?;
504
505        Ok(())
506    }
507
508    /// Archives the current version of a source
509    pub fn archive(&self, source: &str) -> Result<()> {
510        let archive_dir = self.archive_dir(source)?;
511        fs::create_dir_all(&archive_dir)
512            .map_err(|e| Error::Storage(format!("Failed to create archive directory: {e}")))?;
513
514        // Include seconds for uniqueness and clearer chronology
515        let timestamp = Utc::now().format("%Y-%m-%dT%H-%M-%SZ");
516
517        // Archive all llms*.json and llms*.txt files
518        let dir = self.tool_dir(source)?;
519        if dir.exists() {
520            for entry in fs::read_dir(&dir)
521                .map_err(|e| Error::Storage(format!("Failed to read dir for archive: {e}")))?
522            {
523                let entry =
524                    entry.map_err(|e| Error::Storage(format!("Failed to read entry: {e}")))?;
525                let path = entry.path();
526                if !path.is_file() {
527                    continue;
528                }
529                let name = entry.file_name();
530                let name_str = name.to_string_lossy().to_lowercase();
531                // Archive only llms*.json / llms*.txt (skip metadata/anchors)
532                let is_json = std::path::Path::new(&name_str)
533                    .extension()
534                    .is_some_and(|ext| ext.eq_ignore_ascii_case("json"));
535                let is_txt = std::path::Path::new(&name_str)
536                    .extension()
537                    .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"));
538                let is_llms_artifact = (is_json || is_txt) && name_str.starts_with("llms");
539                if is_llms_artifact {
540                    let archive_path =
541                        archive_dir.join(format!("{timestamp}-{}", name.to_string_lossy()));
542                    fs::copy(&path, &archive_path).map_err(|e| {
543                        Error::Storage(format!("Failed to archive {}: {e}", path.display()))
544                    })?;
545                }
546            }
547        }
548
549        info!("Archived {} at {}", source, timestamp);
550        Ok(())
551    }
552
553    /// Check for old cache directory and migrate if needed
554    fn check_and_migrate_old_cache(new_root: &Path) {
555        // Try to find the old cache directory
556        let old_project_dirs = ProjectDirs::from("dev", "outfitter", "cache");
557
558        if let Some(old_dirs) = old_project_dirs {
559            let old_root = old_dirs.data_dir();
560
561            // Check if old directory exists and has content
562            if old_root.exists() && old_root.is_dir() {
563                // Check if there's actually content to migrate (look for llms.json files)
564                let has_content = fs::read_dir(old_root)
565                    .map(|entries| {
566                        entries.filter_map(std::result::Result::ok).any(|entry| {
567                            let path = entry.path();
568                            if !path.is_dir() {
569                                return false;
570                            }
571                            let has_llms_json = path.join("llms.json").exists();
572                            let has_llms_txt = path.join("llms.txt").exists();
573                            let has_metadata = path.join("metadata.json").exists();
574                            has_llms_json || has_llms_txt || has_metadata
575                        })
576                    })
577                    .unwrap_or(false);
578                if has_content {
579                    // Check if new directory already exists with content
580                    if new_root.exists()
581                        && fs::read_dir(new_root)
582                            .map(|mut e| e.next().is_some())
583                            .unwrap_or(false)
584                    {
585                        // New directory already has content, just log a warning
586                        warn!(
587                            "Found old cache at {} but new cache at {} already exists. \
588                             Manual migration may be needed if you want to preserve old data.",
589                            old_root.display(),
590                            new_root.display()
591                        );
592                    } else {
593                        // Attempt migration
594                        info!(
595                            "Migrating cache from old location {} to new location {}",
596                            old_root.display(),
597                            new_root.display()
598                        );
599
600                        if let Err(e) = Self::migrate_directory(old_root, new_root) {
601                            // Log warning but don't fail - let the user continue with fresh cache
602                            warn!(
603                                "Could not automatically migrate cache: {}. \
604                                 Starting with fresh cache at {}. \
605                                 To manually migrate, copy contents from {} to {}",
606                                e,
607                                new_root.display(),
608                                old_root.display(),
609                                new_root.display()
610                            );
611                        } else {
612                            info!("Successfully migrated cache to new location");
613                        }
614                    }
615                }
616            }
617        }
618    }
619
620    /// Recursively copy directory contents from old to new location
621    fn migrate_directory(from: &Path, to: &Path) -> Result<()> {
622        // Create target directory if it doesn't exist
623        fs::create_dir_all(to)
624            .map_err(|e| Error::Storage(format!("Failed to create migration target: {e}")))?;
625
626        // Copy all entries
627        for entry in fs::read_dir(from)
628            .map_err(|e| Error::Storage(format!("Failed to read migration source: {e}")))?
629        {
630            let entry = entry
631                .map_err(|e| Error::Storage(format!("Failed to read directory entry: {e}")))?;
632            let path = entry.path();
633            let file_name = entry.file_name();
634            let target_path = to.join(&file_name);
635
636            if path.is_dir() {
637                // Recursively copy subdirectory
638                Self::migrate_directory(&path, &target_path)?;
639            } else {
640                // Copy file
641                fs::copy(&path, &target_path).map_err(|e| {
642                    Error::Storage(format!("Failed to copy file during migration: {e}"))
643                })?;
644            }
645        }
646
647        Ok(())
648    }
649}
650
651// Note: Default is not implemented as Storage::new() can fail.
652// Use Storage::new() directly and handle the Result.
653
654#[cfg(test)]
655#[allow(clippy::unwrap_used)]
656mod tests {
657    use super::*;
658    use crate::types::{FileInfo, LineIndex, Source, SourceVariant, TocEntry};
659    use std::fs;
660    use tempfile::TempDir;
661
662    fn create_test_storage() -> (Storage, TempDir) {
663        let temp_dir = TempDir::new().expect("Failed to create temp directory");
664        let storage = Storage::with_root(temp_dir.path().to_path_buf())
665            .expect("Failed to create test storage");
666        (storage, temp_dir)
667    }
668
669    fn create_test_llms_json(source_name: &str) -> LlmsJson {
670        LlmsJson {
671            source: source_name.to_string(),
672            metadata: Source {
673                url: format!("https://example.com/{source_name}/llms.txt"),
674                etag: Some("abc123".to_string()),
675                last_modified: None,
676                fetched_at: Utc::now(),
677                sha256: "deadbeef".to_string(),
678                variant: SourceVariant::Llms,
679                aliases: Vec::new(),
680                tags: Vec::new(),
681                description: None,
682                category: None,
683                npm_aliases: Vec::new(),
684                github_aliases: Vec::new(),
685                origin: crate::types::SourceOrigin {
686                    manifest: None,
687                    source_type: Some(crate::types::SourceType::Remote {
688                        url: format!("https://example.com/{source_name}/llms.txt"),
689                    }),
690                },
691                filter_non_english: None,
692            },
693            toc: vec![TocEntry {
694                heading_path: vec!["Getting Started".to_string()],
695                heading_path_display: Some(vec!["Getting Started".to_string()]),
696                heading_path_normalized: Some(vec!["getting started".to_string()]),
697                lines: "1-50".to_string(),
698                anchor: None,
699                children: vec![],
700            }],
701            files: vec![FileInfo {
702                path: "llms.txt".to_string(),
703                sha256: "deadbeef".to_string(),
704            }],
705            line_index: LineIndex {
706                total_lines: 100,
707                byte_offsets: false,
708            },
709            diagnostics: vec![],
710            parse_meta: None,
711            filter_stats: None,
712        }
713    }
714
715    #[test]
716    fn test_storage_creation_with_root() {
717        let temp_dir = TempDir::new().expect("Failed to create temp directory");
718        let storage = Storage::with_root(temp_dir.path().to_path_buf());
719
720        assert!(storage.is_ok());
721        let _storage = storage.unwrap();
722
723        // Verify root directory was created
724        assert!(temp_dir.path().exists());
725    }
726
727    #[test]
728    fn test_tool_directory_paths() {
729        let (storage, _temp_dir) = create_test_storage();
730
731        let tool_dir = storage.tool_dir("react").expect("Should get tool dir");
732        let llms_txt_path = storage
733            .llms_txt_path("react")
734            .expect("Should get llms.txt path");
735        let llms_json_path = storage
736            .llms_json_path("react")
737            .expect("Should get llms.json path");
738        let index_dir = storage.index_dir("react").expect("Should get index dir");
739        let archive_dir = storage
740            .archive_dir("react")
741            .expect("Should get archive dir");
742
743        assert!(tool_dir.ends_with("react"));
744        assert!(llms_txt_path.ends_with("react/llms.txt"));
745        assert!(llms_json_path.ends_with("react/llms.json"));
746        assert!(index_dir.ends_with("react/.index"));
747        assert!(archive_dir.ends_with("react/.archive"));
748    }
749
750    #[test]
751    fn test_invalid_alias_validation() {
752        let (storage, _temp_dir) = create_test_storage();
753
754        // Test path traversal attempts
755        assert!(storage.tool_dir("../etc").is_err());
756        assert!(storage.tool_dir("../../passwd").is_err());
757        assert!(storage.tool_dir("test/../../../etc").is_err());
758
759        // Test invalid characters
760        assert!(storage.tool_dir(".hidden").is_err());
761        assert!(storage.tool_dir("test\0null").is_err());
762        assert!(storage.tool_dir("test/slash").is_err());
763        assert!(storage.tool_dir("test\\backslash").is_err());
764
765        // Test empty alias
766        assert!(storage.tool_dir("").is_err());
767
768        // Test valid aliases
769        assert!(storage.tool_dir("react").is_ok());
770        assert!(storage.tool_dir("my-tool").is_ok());
771        assert!(storage.tool_dir("tool_123").is_ok());
772    }
773
774    #[test]
775    fn test_ensure_tool_directory() {
776        let (storage, _temp_dir) = create_test_storage();
777
778        let tool_dir = storage
779            .ensure_tool_dir("react")
780            .expect("Should create tool dir");
781        assert!(tool_dir.exists());
782
783        // Should be idempotent
784        let tool_dir2 = storage
785            .ensure_tool_dir("react")
786            .expect("Should not fail on existing dir");
787        assert_eq!(tool_dir, tool_dir2);
788    }
789
790    #[test]
791    fn test_save_and_load_llms_txt() {
792        let (storage, _temp_dir) = create_test_storage();
793
794        let content = "# React Documentation\n\nThis is the React documentation...";
795
796        // Save content
797        storage
798            .save_llms_txt("react", content)
799            .expect("Should save llms.txt");
800
801        // Verify file exists
802        assert!(
803            storage
804                .llms_txt_path("react")
805                .expect("Should get path")
806                .exists()
807        );
808
809        // Load content
810        let loaded_content = storage
811            .load_llms_txt("react")
812            .expect("Should load llms.txt");
813        assert_eq!(content, loaded_content);
814    }
815
816    #[test]
817    fn test_save_and_load_llms_json() {
818        let (storage, _temp_dir) = create_test_storage();
819
820        let llms_json = create_test_llms_json("react");
821
822        // Save JSON
823        storage
824            .save_llms_json("react", &llms_json)
825            .expect("Should save llms.json");
826
827        // Verify file exists
828        assert!(
829            storage
830                .llms_json_path("react")
831                .expect("Should get path")
832                .exists()
833        );
834
835        // Load JSON
836        let loaded_json = storage
837            .load_llms_json("react")
838            .expect("Should load llms.json");
839        assert_eq!(llms_json.source, loaded_json.source);
840        assert_eq!(llms_json.metadata.url, loaded_json.metadata.url);
841        assert_eq!(
842            llms_json.line_index.total_lines,
843            loaded_json.line_index.total_lines
844        );
845    }
846
847    #[test]
848    fn test_source_exists() {
849        let (storage, _temp_dir) = create_test_storage();
850
851        // Initially should not exist
852        assert!(!storage.exists("react"));
853
854        // After saving llms.json, should exist
855        let llms_json = create_test_llms_json("react");
856        storage
857            .save_llms_json("react", &llms_json)
858            .expect("Should save");
859
860        assert!(storage.exists("react"));
861    }
862
863    #[test]
864    fn test_list_sources_empty() {
865        let (storage, _temp_dir) = create_test_storage();
866
867        let sources = storage.list_sources();
868        assert!(sources.is_empty());
869    }
870
871    #[test]
872    fn test_list_sources_with_data() {
873        let (storage, _temp_dir) = create_test_storage();
874
875        // Add multiple sources
876        let aliases = ["react", "nextjs", "rust"];
877        for &alias in &aliases {
878            let llms_json = create_test_llms_json(alias);
879            storage
880                .save_llms_json(alias, &llms_json)
881                .expect("Should save");
882        }
883
884        let sources = storage.list_sources();
885        assert_eq!(sources.len(), 3);
886
887        // Should be sorted
888        assert_eq!(sources, vec!["nextjs", "react", "rust"]);
889    }
890
891    #[test]
892    fn test_list_sources_ignores_hidden_dirs() {
893        let (storage, temp_dir) = create_test_storage();
894
895        // Create a hidden directory
896        let hidden_dir = temp_dir.path().join(".hidden");
897        fs::create_dir(&hidden_dir).expect("Should create hidden dir");
898
899        // Create a regular source
900        let llms_json = create_test_llms_json("react");
901        storage
902            .save_llms_json("react", &llms_json)
903            .expect("Should save");
904
905        let sources = storage.list_sources();
906        assert_eq!(sources.len(), 1);
907        assert_eq!(sources[0], "react");
908    }
909
910    #[test]
911    fn test_list_sources_requires_llms_json() {
912        let (storage, _temp_dir) = create_test_storage();
913
914        // Create tool directory without llms.json
915        storage
916            .ensure_tool_dir("incomplete")
917            .expect("Should create dir");
918
919        // Save only llms.txt (no llms.json)
920        storage
921            .save_llms_txt("incomplete", "# Test content")
922            .expect("Should save txt");
923
924        // Create another source with complete data
925        let llms_json = create_test_llms_json("complete");
926        storage
927            .save_llms_json("complete", &llms_json)
928            .expect("Should save json");
929
930        let sources = storage.list_sources();
931        assert_eq!(sources.len(), 1);
932        assert_eq!(sources[0], "complete");
933    }
934
935    #[test]
936    fn test_archive_functionality() {
937        let (storage, _temp_dir) = create_test_storage();
938
939        // Create source data
940        let content = "# Test content";
941        let llms_json = create_test_llms_json("test");
942
943        storage
944            .save_llms_txt("test", content)
945            .expect("Should save txt");
946        storage
947            .save_llms_json("test", &llms_json)
948            .expect("Should save json");
949
950        // Archive the source
951        storage.archive("test").expect("Should archive");
952
953        // Verify archive directory exists
954        let archive_dir = storage.archive_dir("test").expect("Should get archive dir");
955        assert!(archive_dir.exists());
956
957        // Verify archived files exist (names contain timestamp)
958        let archive_entries: Vec<_> = fs::read_dir(&archive_dir)
959            .expect("Should read archive dir")
960            .collect::<std::result::Result<Vec<_>, std::io::Error>>()
961            .expect("Should collect entries");
962
963        assert_eq!(archive_entries.len(), 2); // llms.txt and llms.json
964
965        // Verify archived files have correct names
966        let mut has_txt = false;
967        let mut has_json = false;
968        for entry in archive_entries {
969            let name = entry.file_name().to_string_lossy().to_string();
970            if name.contains("llms.txt") {
971                has_txt = true;
972            }
973            if name.contains("llms.json") {
974                has_json = true;
975            }
976        }
977
978        assert!(has_txt, "Should have archived llms.txt");
979        assert!(has_json, "Should have archived llms.json");
980    }
981
982    #[test]
983    fn test_archive_missing_files() {
984        let (storage, _temp_dir) = create_test_storage();
985
986        // Archive non-existent source - should not fail
987        let result = storage.archive("nonexistent");
988        assert!(result.is_ok());
989
990        // Archive directory should still be created
991        let archive_dir = storage
992            .archive_dir("nonexistent")
993            .expect("Should get archive dir");
994        assert!(archive_dir.exists());
995    }
996
997    #[test]
998    fn test_load_missing_files_returns_error() {
999        let (storage, _temp_dir) = create_test_storage();
1000
1001        let result = storage.load_llms_txt("nonexistent");
1002        assert!(result.is_err());
1003
1004        let result = storage.load_llms_json("nonexistent");
1005        assert!(result.is_err());
1006    }
1007
1008    #[test]
1009    fn test_json_serialization_roundtrip() {
1010        let (storage, _temp_dir) = create_test_storage();
1011
1012        let original = create_test_llms_json("test");
1013
1014        // Save and load
1015        storage
1016            .save_llms_json("test", &original)
1017            .expect("Should save");
1018        let loaded = storage.load_llms_json("test").expect("Should load");
1019
1020        // Verify all fields are preserved
1021        assert_eq!(original.source, loaded.source);
1022        assert_eq!(original.metadata.url, loaded.metadata.url);
1023        assert_eq!(original.metadata.sha256, loaded.metadata.sha256);
1024        assert_eq!(original.toc.len(), loaded.toc.len());
1025        assert_eq!(original.files.len(), loaded.files.len());
1026        assert_eq!(
1027            original.line_index.total_lines,
1028            loaded.line_index.total_lines
1029        );
1030        assert_eq!(original.diagnostics.len(), loaded.diagnostics.len());
1031    }
1032}