blz_core/
storage.rs

1use crate::{Error, LlmsJson, Result, Source, SourceDescriptor, profile};
2use chrono::Utc;
3use directories::{BaseDirs, ProjectDirs};
4use std::fs;
5use std::path::{Path, PathBuf};
6use tracing::{debug, info, warn};
7
8/// Maximum allowed alias length to match CLI constraints
9const MAX_ALIAS_LEN: usize = 64;
10
11/// Local filesystem storage for cached llms.txt documentation
12pub struct Storage {
13    root_dir: PathBuf,
14    config_dir: PathBuf,
15}
16
17impl Storage {
18    fn sanitize_variant_file_name(name: &str) -> String {
19        // Only allow a conservative set of filename characters to avoid
20        // accidentally writing outside the tool directory or producing
21        // surprising paths. Anything else becomes an underscore so that the
22        // resulting filename stays predictable and safe to use across
23        // platforms.
24        let mut sanitized: String = name
25            .chars()
26            .map(|c| {
27                if c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-') {
28                    c
29                } else {
30                    '_'
31                }
32            })
33            .collect();
34
35        // Collapse any ".." segments that could be introduced either by the
36        // caller or by the substitution above. This keeps the path rooted at
37        // the alias directory even if callers pass traversal attempts.
38        while sanitized.contains("..") {
39            sanitized = sanitized.replace("..", "_");
40        }
41
42        if sanitized.is_empty() {
43            "llms.txt".to_string()
44        } else {
45            sanitized
46        }
47    }
48
49    // Storage uses consistent filenames regardless of source URL:
50    // - llms.txt for content (even if fetched from llms-full.txt)
51    // - llms.json for parsed data
52    // - metadata.json for source metadata
53
54    /// Creates a new storage instance with the default root directory
55    pub fn new() -> Result<Self> {
56        // Test/dev override: allow BLZ_DATA_DIR to set the root directory explicitly
57        if let Ok(dir) = std::env::var("BLZ_DATA_DIR") {
58            let root = PathBuf::from(dir);
59            let config_dir = Self::default_config_dir()?;
60            return Self::with_paths(root, config_dir);
61        }
62
63        // Use XDG_DATA_HOME if explicitly set
64        let root_dir = if let Ok(xdg) = std::env::var("XDG_DATA_HOME") {
65            let trimmed = xdg.trim();
66            if trimmed.is_empty() {
67                Self::fallback_data_dir()?
68            } else {
69                PathBuf::from(trimmed).join(profile::app_dir_slug())
70            }
71        } else {
72            Self::fallback_data_dir()?
73        };
74
75        // Check for migration from old cache directory
76        Self::check_and_migrate_old_cache(&root_dir);
77
78        let config_dir = Self::default_config_dir()?;
79        Self::with_paths(root_dir, config_dir)
80    }
81
82    /// Fallback data directory when `XDG_DATA_HOME` is not set
83    fn fallback_data_dir() -> Result<PathBuf> {
84        // Use ~/.blz/ for data (same location as config for non-XDG systems)
85        let home = directories::BaseDirs::new()
86            .ok_or_else(|| Error::Storage("Failed to determine home directory".into()))?;
87        Ok(home.home_dir().join(profile::dot_dir_slug()))
88    }
89
90    /// Determine the default configuration directory honoring overrides
91    fn default_config_dir() -> Result<PathBuf> {
92        if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
93            let trimmed = dir.trim();
94            if !trimmed.is_empty() {
95                return Ok(PathBuf::from(trimmed));
96            }
97        }
98
99        if let Ok(dir) = std::env::var("BLZ_GLOBAL_CONFIG_DIR") {
100            let trimmed = dir.trim();
101            if !trimmed.is_empty() {
102                return Ok(PathBuf::from(trimmed));
103            }
104        }
105
106        if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
107            let trimmed = xdg.trim();
108            if !trimmed.is_empty() {
109                return Ok(PathBuf::from(trimmed).join(profile::app_dir_slug()));
110            }
111        }
112
113        if let Some(base_dirs) = BaseDirs::new() {
114            return Ok(base_dirs.home_dir().join(profile::dot_dir_slug()));
115        }
116
117        Err(Error::Storage(
118            "Failed to determine configuration directory".into(),
119        ))
120    }
121
122    /// Creates a new storage instance with a custom root directory
123    pub fn with_root(root_dir: PathBuf) -> Result<Self> {
124        let config_dir = root_dir.join("config");
125        Self::with_paths(root_dir, config_dir)
126    }
127
128    /// Creates a new storage instance with explicit data and config directories
129    pub fn with_paths(root_dir: PathBuf, config_dir: PathBuf) -> Result<Self> {
130        fs::create_dir_all(&root_dir)
131            .map_err(|e| Error::Storage(format!("Failed to create root directory: {e}")))?;
132        fs::create_dir_all(&config_dir)
133            .map_err(|e| Error::Storage(format!("Failed to create config directory: {e}")))?;
134
135        Ok(Self {
136            root_dir,
137            config_dir,
138        })
139    }
140
141    /// Returns the root data directory path
142    #[must_use]
143    pub fn root_dir(&self) -> &Path {
144        &self.root_dir
145    }
146
147    /// Returns the root configuration directory path used for descriptors
148    #[must_use]
149    pub fn config_dir(&self) -> &Path {
150        &self.config_dir
151    }
152
153    fn descriptors_dir(&self) -> PathBuf {
154        self.config_dir.join("sources")
155    }
156
157    /// Returns the path to the descriptor TOML for a source
158    pub fn descriptor_path(&self, alias: &str) -> Result<PathBuf> {
159        Self::validate_alias(alias)?;
160        Ok(self.descriptors_dir().join(format!("{alias}.toml")))
161    }
162
163    /// Persist a descriptor to disk, creating parent directories if necessary
164    pub fn save_descriptor(&self, descriptor: &SourceDescriptor) -> Result<()> {
165        let path = self.descriptor_path(&descriptor.alias)?;
166        if let Some(parent) = path.parent() {
167            fs::create_dir_all(parent)
168                .map_err(|e| Error::Storage(format!("Failed to create descriptor dir: {e}")))?;
169        }
170
171        let toml = toml::to_string_pretty(descriptor)
172            .map_err(|e| Error::Storage(format!("Failed to serialize descriptor: {e}")))?;
173        fs::write(&path, toml)
174            .map_err(|e| Error::Storage(format!("Failed to write descriptor: {e}")))?;
175        Ok(())
176    }
177
178    /// Load a descriptor if it exists
179    pub fn load_descriptor(&self, alias: &str) -> Result<Option<SourceDescriptor>> {
180        let path = self.descriptor_path(alias)?;
181        if !path.exists() {
182            return Ok(None);
183        }
184
185        let contents = fs::read_to_string(&path)
186            .map_err(|e| Error::Storage(format!("Failed to read descriptor: {e}")))?;
187        let descriptor = toml::from_str::<SourceDescriptor>(&contents)
188            .map_err(|e| Error::Storage(format!("Failed to parse descriptor: {e}")))?;
189        Ok(Some(descriptor))
190    }
191
192    /// Remove descriptor file for an alias if present
193    pub fn remove_descriptor(&self, alias: &str) -> Result<()> {
194        let path = self.descriptor_path(alias)?;
195        if path.exists() {
196            fs::remove_file(&path)
197                .map_err(|e| Error::Storage(format!("Failed to remove descriptor: {e}")))?;
198        }
199        Ok(())
200    }
201
202    /// Returns the directory path for a given alias
203    pub fn tool_dir(&self, source: &str) -> Result<PathBuf> {
204        // Validate alias to prevent directory traversal attacks
205        Self::validate_alias(source)?;
206        Ok(self.root_dir.join("sources").join(source))
207    }
208
209    /// Resolve the on-disk path for a specific flavored content file.
210    fn variant_file_path(&self, source: &str, file_name: &str) -> Result<PathBuf> {
211        let sanitized = Self::sanitize_variant_file_name(file_name);
212        Ok(self.tool_dir(source)?.join(sanitized))
213    }
214
215    /// Ensures the directory for an alias exists and returns its path
216    pub fn ensure_tool_dir(&self, source: &str) -> Result<PathBuf> {
217        let dir = self.tool_dir(source)?;
218        fs::create_dir_all(&dir)
219            .map_err(|e| Error::Storage(format!("Failed to create tool directory: {e}")))?;
220        Ok(dir)
221    }
222
223    /// Validate that an alias is safe to use as a directory name
224    ///
225    /// This validation is unified with CLI constraints to prevent inconsistencies
226    /// between what the CLI accepts and what storage can handle.
227    fn validate_alias(alias: &str) -> Result<()> {
228        // Check for empty alias
229        if alias.is_empty() {
230            return Err(Error::Storage("Alias cannot be empty".into()));
231        }
232
233        // Disallow leading hyphen to avoid CLI parsing ambiguities
234        if alias.starts_with('-') {
235            return Err(Error::Storage(format!(
236                "Invalid alias '{alias}': cannot start with '-'"
237            )));
238        }
239
240        // Check for path traversal attempts
241        if alias.contains("..") || alias.contains('/') || alias.contains('\\') {
242            return Err(Error::Storage(format!(
243                "Invalid alias '{alias}': contains path traversal characters"
244            )));
245        }
246
247        // Check for special filesystem characters
248        if alias.starts_with('.') || alias.contains('\0') {
249            return Err(Error::Storage(format!(
250                "Invalid alias '{alias}': contains invalid filesystem characters"
251            )));
252        }
253
254        // Check for reserved names on Windows
255        #[cfg(target_os = "windows")]
256        {
257            const RESERVED_NAMES: &[&str] = &[
258                "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
259                "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
260                "LPT9",
261            ];
262
263            let upper_alias = alias.to_uppercase();
264            if RESERVED_NAMES.contains(&upper_alias.as_str()) {
265                return Err(Error::Storage(format!(
266                    "Invalid alias '{}': reserved name on Windows",
267                    alias
268                )));
269            }
270        }
271
272        // Check length (keep consistent with CLI policy)
273        if alias.len() > MAX_ALIAS_LEN {
274            return Err(Error::Storage(format!(
275                "Invalid alias '{alias}': exceeds maximum length of {MAX_ALIAS_LEN} characters"
276            )));
277        }
278
279        // Only allow ASCII alphanumeric, dash, underscore
280        if !alias
281            .chars()
282            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
283        {
284            return Err(Error::Storage(format!(
285                "Invalid alias '{alias}': only [A-Za-z0-9_-] are allowed"
286            )));
287        }
288
289        Ok(())
290    }
291
292    /// Returns the path to the llms.txt file for a source
293    pub fn llms_txt_path(&self, source: &str) -> Result<PathBuf> {
294        self.variant_file_path(source, "llms.txt")
295    }
296
297    /// Returns the path to the llms.json file for a source
298    pub fn llms_json_path(&self, source: &str) -> Result<PathBuf> {
299        Ok(self.tool_dir(source)?.join("llms.json"))
300    }
301
302    /// Returns the path to the search index directory for a source
303    pub fn index_dir(&self, source: &str) -> Result<PathBuf> {
304        Ok(self.tool_dir(source)?.join(".index"))
305    }
306
307    /// Returns the path to the archive directory for a source
308    pub fn archive_dir(&self, source: &str) -> Result<PathBuf> {
309        Ok(self.tool_dir(source)?.join(".archive"))
310    }
311
312    /// Returns the path to the metadata file for a source
313    pub fn metadata_path(&self, source: &str) -> Result<PathBuf> {
314        Ok(self.tool_dir(source)?.join("metadata.json"))
315    }
316
317    /// Returns the path to the anchors mapping file for a source
318    pub fn anchors_map_path(&self, source: &str) -> Result<PathBuf> {
319        Ok(self.tool_dir(source)?.join("anchors.json"))
320    }
321
322    /// Saves the llms.txt content for a source
323    pub fn save_llms_txt(&self, source: &str, content: &str) -> Result<()> {
324        self.ensure_tool_dir(source)?;
325        let path = self.llms_txt_path(source)?;
326
327        let tmp_path = path.with_extension("tmp");
328        fs::write(&tmp_path, content)
329            .map_err(|e| Error::Storage(format!("Failed to write llms.txt: {e}")))?;
330
331        #[cfg(target_os = "windows")]
332        if path.exists() {
333            fs::remove_file(&path)
334                .map_err(|e| Error::Storage(format!("Failed to remove existing llms.txt: {e}")))?;
335        }
336
337        fs::rename(&tmp_path, &path)
338            .map_err(|e| Error::Storage(format!("Failed to commit llms.txt: {e}")))?;
339
340        debug!("Saved llms.txt for {}", source);
341        Ok(())
342    }
343
344    /// Loads the llms.txt content for a source
345    pub fn load_llms_txt(&self, source: &str) -> Result<String> {
346        let path = self.llms_txt_path(source)?;
347        fs::read_to_string(&path)
348            .map_err(|e| Error::Storage(format!("Failed to read llms.txt: {e}")))
349    }
350
351    /// Saves the parsed llms.json data for a source
352    pub fn save_llms_json(&self, source: &str, data: &LlmsJson) -> Result<()> {
353        self.ensure_tool_dir(source)?;
354        let path = self.llms_json_path(source)?;
355        let json = serde_json::to_string_pretty(data)
356            .map_err(|e| Error::Storage(format!("Failed to serialize JSON: {e}")))?;
357
358        let tmp_path = path.with_extension("json.tmp");
359        fs::write(&tmp_path, json)
360            .map_err(|e| Error::Storage(format!("Failed to write llms.json: {e}")))?;
361
362        #[cfg(target_os = "windows")]
363        if path.exists() {
364            fs::remove_file(&path)
365                .map_err(|e| Error::Storage(format!("Failed to remove existing llms.json: {e}")))?;
366        }
367        fs::rename(&tmp_path, &path)
368            .map_err(|e| Error::Storage(format!("Failed to commit llms.json: {e}")))?;
369
370        debug!("Saved llms.json for {}", source);
371        Ok(())
372    }
373
374    /// Loads the parsed llms.json data for a source
375    pub fn load_llms_json(&self, source: &str) -> Result<LlmsJson> {
376        let path = self.llms_json_path(source)?;
377        if !path.exists() {
378            return Err(Error::Storage(format!(
379                "llms.json missing for source '{source}'"
380            )));
381        }
382        let json = fs::read_to_string(&path)
383            .map_err(|e| Error::Storage(format!("Failed to read llms.json: {e}")))?;
384
385        // Try to detect old v0.4.x format
386        if let Ok(raw_value) = serde_json::from_str::<serde_json::Value>(&json) {
387            if let Some(obj) = raw_value.as_object() {
388                // Old format has "alias" field instead of "source"
389                if obj.contains_key("alias")
390                    || (obj.contains_key("source") && obj["source"].is_object())
391                {
392                    return Err(Error::Storage(format!(
393                        "Incompatible cache format detected for source '{source}'.\n\n\
394                         This cache was created with blz v0.4.x or earlier and is not compatible with v0.5.0+.\n\n\
395                         To fix this, clear your cache:\n  \
396                         blz clear --force\n\n\
397                         Then re-add your sources."
398                    )));
399                }
400            }
401        }
402
403        let data = serde_json::from_str(&json)
404            .map_err(|e| Error::Storage(format!("Failed to parse llms.json: {e}")))?;
405        Ok(data)
406    }
407
408    /// Saves source metadata for a source
409    pub fn save_source_metadata(&self, source: &str, metadata: &Source) -> Result<()> {
410        self.ensure_tool_dir(source)?;
411        let path = self.metadata_path(source)?;
412        let json = serde_json::to_string_pretty(metadata)
413            .map_err(|e| Error::Storage(format!("Failed to serialize metadata: {e}")))?;
414
415        // Write to a temp file first to ensure atomicity
416        let tmp_path = path.with_extension("json.tmp");
417        fs::write(&tmp_path, &json)
418            .map_err(|e| Error::Storage(format!("Failed to write temp metadata: {e}")))?;
419
420        // Atomically rename temp file to final path (handle Windows overwrite)
421        #[cfg(target_os = "windows")]
422        if path.exists() {
423            fs::remove_file(&path)
424                .map_err(|e| Error::Storage(format!("Failed to remove existing metadata: {e}")))?;
425        }
426        fs::rename(&tmp_path, &path)
427            .map_err(|e| Error::Storage(format!("Failed to persist metadata: {e}")))?;
428
429        debug!("Saved metadata for {}", source);
430        Ok(())
431    }
432
433    /// Save anchors remap JSON for a source
434    pub fn save_anchors_map(&self, source: &str, map: &crate::AnchorsMap) -> Result<()> {
435        self.ensure_tool_dir(source)?;
436        let path = self.anchors_map_path(source)?;
437        let json = serde_json::to_string_pretty(map)
438            .map_err(|e| Error::Storage(format!("Failed to serialize anchors map: {e}")))?;
439        fs::write(&path, json)
440            .map_err(|e| Error::Storage(format!("Failed to write anchors map: {e}")))?;
441        Ok(())
442    }
443
444    /// Loads source metadata for a source if it exists
445    pub fn load_source_metadata(&self, source: &str) -> Result<Option<Source>> {
446        let path = self.metadata_path(source)?;
447        if !path.exists() {
448            return Ok(None);
449        }
450        let json = fs::read_to_string(&path)
451            .map_err(|e| Error::Storage(format!("Failed to read metadata: {e}")))?;
452        let metadata = serde_json::from_str(&json)
453            .map_err(|e| Error::Storage(format!("Failed to parse metadata: {e}")))?;
454        Ok(Some(metadata))
455    }
456
457    /// Checks if a source exists in storage
458    #[must_use]
459    pub fn exists(&self, source: &str) -> bool {
460        self.llms_json_path(source)
461            .map(|path| path.exists())
462            .unwrap_or(false)
463    }
464
465    /// Lists all cached sources
466    #[must_use]
467    pub fn list_sources(&self) -> Vec<String> {
468        let mut sources = Vec::new();
469        let sources_dir = self.root_dir.join("sources");
470
471        if let Ok(entries) = fs::read_dir(&sources_dir) {
472            for entry in entries.flatten() {
473                if entry.path().is_dir() {
474                    if let Some(name) = entry.file_name().to_str() {
475                        if !name.starts_with('.') && self.exists(name) {
476                            sources.push(name.to_string());
477                        }
478                    }
479                }
480            }
481        }
482
483        sources.sort();
484        sources
485    }
486
487    /// Clears the entire cache directory, removing all sources and their data.
488    ///
489    /// This is a destructive operation that cannot be undone. Use with caution.
490    ///
491    /// # Errors
492    ///
493    /// Returns an error if the cache directory cannot be removed or recreated.
494    pub fn clear_cache(&self) -> Result<()> {
495        // Remove the entire root directory
496        if self.root_dir.exists() {
497            fs::remove_dir_all(&self.root_dir)
498                .map_err(|e| Error::Storage(format!("Failed to remove cache directory: {e}")))?;
499        }
500
501        // Recreate empty root directory
502        fs::create_dir_all(&self.root_dir)
503            .map_err(|e| Error::Storage(format!("Failed to recreate cache directory: {e}")))?;
504
505        Ok(())
506    }
507
508    /// Archives the current version of a source
509    pub fn archive(&self, source: &str) -> Result<()> {
510        let archive_dir = self.archive_dir(source)?;
511        fs::create_dir_all(&archive_dir)
512            .map_err(|e| Error::Storage(format!("Failed to create archive directory: {e}")))?;
513
514        // Include seconds for uniqueness and clearer chronology
515        let timestamp = Utc::now().format("%Y-%m-%dT%H-%M-%SZ");
516
517        // Archive all llms*.json and llms*.txt files
518        let dir = self.tool_dir(source)?;
519        if dir.exists() {
520            for entry in fs::read_dir(&dir)
521                .map_err(|e| Error::Storage(format!("Failed to read dir for archive: {e}")))?
522            {
523                let entry =
524                    entry.map_err(|e| Error::Storage(format!("Failed to read entry: {e}")))?;
525                let path = entry.path();
526                if !path.is_file() {
527                    continue;
528                }
529                let name = entry.file_name();
530                let name_str = name.to_string_lossy().to_lowercase();
531                // Archive only llms*.json / llms*.txt (skip metadata/anchors)
532                let is_json = std::path::Path::new(&name_str)
533                    .extension()
534                    .is_some_and(|ext| ext.eq_ignore_ascii_case("json"));
535                let is_txt = std::path::Path::new(&name_str)
536                    .extension()
537                    .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"));
538                let is_llms_artifact = (is_json || is_txt) && name_str.starts_with("llms");
539                if is_llms_artifact {
540                    let archive_path =
541                        archive_dir.join(format!("{timestamp}-{}", name.to_string_lossy()));
542                    fs::copy(&path, &archive_path).map_err(|e| {
543                        Error::Storage(format!("Failed to archive {}: {e}", path.display()))
544                    })?;
545                }
546            }
547        }
548
549        info!("Archived {} at {}", source, timestamp);
550        Ok(())
551    }
552
553    /// Check for old cache directory and migrate if needed
554    fn check_and_migrate_old_cache(new_root: &Path) {
555        // Try to find the old cache directory
556        let old_project_dirs = ProjectDirs::from("dev", "outfitter", "cache");
557
558        if let Some(old_dirs) = old_project_dirs {
559            let old_root = old_dirs.data_dir();
560
561            // Check if old directory exists and has content
562            if old_root.exists() && old_root.is_dir() {
563                // Check if there's actually content to migrate (look for llms.json files)
564                let has_content = fs::read_dir(old_root)
565                    .map(|entries| {
566                        entries.filter_map(std::result::Result::ok).any(|entry| {
567                            let path = entry.path();
568                            if !path.is_dir() {
569                                return false;
570                            }
571                            let has_llms_json = path.join("llms.json").exists();
572                            let has_llms_txt = path.join("llms.txt").exists();
573                            let has_metadata = path.join("metadata.json").exists();
574                            has_llms_json || has_llms_txt || has_metadata
575                        })
576                    })
577                    .unwrap_or(false);
578                if has_content {
579                    // Check if new directory already exists with content
580                    if new_root.exists()
581                        && fs::read_dir(new_root)
582                            .map(|mut e| e.next().is_some())
583                            .unwrap_or(false)
584                    {
585                        // New directory already has content, just log a warning
586                        warn!(
587                            "Found old cache at {} but new cache at {} already exists. \
588                             Manual migration may be needed if you want to preserve old data.",
589                            old_root.display(),
590                            new_root.display()
591                        );
592                    } else {
593                        // Attempt migration
594                        info!(
595                            "Migrating cache from old location {} to new location {}",
596                            old_root.display(),
597                            new_root.display()
598                        );
599
600                        if let Err(e) = Self::migrate_directory(old_root, new_root) {
601                            // Log warning but don't fail - let the user continue with fresh cache
602                            warn!(
603                                "Could not automatically migrate cache: {}. \
604                                 Starting with fresh cache at {}. \
605                                 To manually migrate, copy contents from {} to {}",
606                                e,
607                                new_root.display(),
608                                old_root.display(),
609                                new_root.display()
610                            );
611                        } else {
612                            info!("Successfully migrated cache to new location");
613                        }
614                    }
615                }
616            }
617        }
618    }
619
620    /// Recursively copy directory contents from old to new location
621    fn migrate_directory(from: &Path, to: &Path) -> Result<()> {
622        // Create target directory if it doesn't exist
623        fs::create_dir_all(to)
624            .map_err(|e| Error::Storage(format!("Failed to create migration target: {e}")))?;
625
626        // Copy all entries
627        for entry in fs::read_dir(from)
628            .map_err(|e| Error::Storage(format!("Failed to read migration source: {e}")))?
629        {
630            let entry = entry
631                .map_err(|e| Error::Storage(format!("Failed to read directory entry: {e}")))?;
632            let path = entry.path();
633            let file_name = entry.file_name();
634            let target_path = to.join(&file_name);
635
636            if path.is_dir() {
637                // Recursively copy subdirectory
638                Self::migrate_directory(&path, &target_path)?;
639            } else {
640                // Copy file
641                fs::copy(&path, &target_path).map_err(|e| {
642                    Error::Storage(format!("Failed to copy file during migration: {e}"))
643                })?;
644            }
645        }
646
647        Ok(())
648    }
649}
650
651// Note: Default is not implemented as Storage::new() can fail.
652// Use Storage::new() directly and handle the Result.
653
654#[cfg(test)]
655#[allow(clippy::unwrap_used)]
656mod tests {
657    use super::*;
658    use crate::types::{FileInfo, LineIndex, Source, SourceVariant, TocEntry};
659    use std::fs;
660    use tempfile::TempDir;
661
662    fn create_test_storage() -> (Storage, TempDir) {
663        let temp_dir = TempDir::new().expect("Failed to create temp directory");
664        let storage = Storage::with_root(temp_dir.path().to_path_buf())
665            .expect("Failed to create test storage");
666        (storage, temp_dir)
667    }
668
669    fn create_test_llms_json(source_name: &str) -> LlmsJson {
670        LlmsJson {
671            source: source_name.to_string(),
672            metadata: Source {
673                url: format!("https://example.com/{source_name}/llms.txt"),
674                etag: Some("abc123".to_string()),
675                last_modified: None,
676                fetched_at: Utc::now(),
677                sha256: "deadbeef".to_string(),
678                variant: SourceVariant::Llms,
679                aliases: Vec::new(),
680                tags: Vec::new(),
681                description: None,
682                category: None,
683                npm_aliases: Vec::new(),
684                github_aliases: Vec::new(),
685                origin: crate::types::SourceOrigin {
686                    manifest: None,
687                    source_type: Some(crate::types::SourceType::Remote {
688                        url: format!("https://example.com/{source_name}/llms.txt"),
689                    }),
690                },
691            },
692            toc: vec![TocEntry {
693                heading_path: vec!["Getting Started".to_string()],
694                lines: "1-50".to_string(),
695                anchor: None,
696                children: vec![],
697            }],
698            files: vec![FileInfo {
699                path: "llms.txt".to_string(),
700                sha256: "deadbeef".to_string(),
701            }],
702            line_index: LineIndex {
703                total_lines: 100,
704                byte_offsets: false,
705            },
706            diagnostics: vec![],
707            parse_meta: None,
708        }
709    }
710
711    #[test]
712    fn test_storage_creation_with_root() {
713        let temp_dir = TempDir::new().expect("Failed to create temp directory");
714        let storage = Storage::with_root(temp_dir.path().to_path_buf());
715
716        assert!(storage.is_ok());
717        let _storage = storage.unwrap();
718
719        // Verify root directory was created
720        assert!(temp_dir.path().exists());
721    }
722
723    #[test]
724    fn test_tool_directory_paths() {
725        let (storage, _temp_dir) = create_test_storage();
726
727        let tool_dir = storage.tool_dir("react").expect("Should get tool dir");
728        let llms_txt_path = storage
729            .llms_txt_path("react")
730            .expect("Should get llms.txt path");
731        let llms_json_path = storage
732            .llms_json_path("react")
733            .expect("Should get llms.json path");
734        let index_dir = storage.index_dir("react").expect("Should get index dir");
735        let archive_dir = storage
736            .archive_dir("react")
737            .expect("Should get archive dir");
738
739        assert!(tool_dir.ends_with("react"));
740        assert!(llms_txt_path.ends_with("react/llms.txt"));
741        assert!(llms_json_path.ends_with("react/llms.json"));
742        assert!(index_dir.ends_with("react/.index"));
743        assert!(archive_dir.ends_with("react/.archive"));
744    }
745
746    #[test]
747    fn test_invalid_alias_validation() {
748        let (storage, _temp_dir) = create_test_storage();
749
750        // Test path traversal attempts
751        assert!(storage.tool_dir("../etc").is_err());
752        assert!(storage.tool_dir("../../passwd").is_err());
753        assert!(storage.tool_dir("test/../../../etc").is_err());
754
755        // Test invalid characters
756        assert!(storage.tool_dir(".hidden").is_err());
757        assert!(storage.tool_dir("test\0null").is_err());
758        assert!(storage.tool_dir("test/slash").is_err());
759        assert!(storage.tool_dir("test\\backslash").is_err());
760
761        // Test empty alias
762        assert!(storage.tool_dir("").is_err());
763
764        // Test valid aliases
765        assert!(storage.tool_dir("react").is_ok());
766        assert!(storage.tool_dir("my-tool").is_ok());
767        assert!(storage.tool_dir("tool_123").is_ok());
768    }
769
770    #[test]
771    fn test_ensure_tool_directory() {
772        let (storage, _temp_dir) = create_test_storage();
773
774        let tool_dir = storage
775            .ensure_tool_dir("react")
776            .expect("Should create tool dir");
777        assert!(tool_dir.exists());
778
779        // Should be idempotent
780        let tool_dir2 = storage
781            .ensure_tool_dir("react")
782            .expect("Should not fail on existing dir");
783        assert_eq!(tool_dir, tool_dir2);
784    }
785
786    #[test]
787    fn test_save_and_load_llms_txt() {
788        let (storage, _temp_dir) = create_test_storage();
789
790        let content = "# React Documentation\n\nThis is the React documentation...";
791
792        // Save content
793        storage
794            .save_llms_txt("react", content)
795            .expect("Should save llms.txt");
796
797        // Verify file exists
798        assert!(
799            storage
800                .llms_txt_path("react")
801                .expect("Should get path")
802                .exists()
803        );
804
805        // Load content
806        let loaded_content = storage
807            .load_llms_txt("react")
808            .expect("Should load llms.txt");
809        assert_eq!(content, loaded_content);
810    }
811
812    #[test]
813    fn test_save_and_load_llms_json() {
814        let (storage, _temp_dir) = create_test_storage();
815
816        let llms_json = create_test_llms_json("react");
817
818        // Save JSON
819        storage
820            .save_llms_json("react", &llms_json)
821            .expect("Should save llms.json");
822
823        // Verify file exists
824        assert!(
825            storage
826                .llms_json_path("react")
827                .expect("Should get path")
828                .exists()
829        );
830
831        // Load JSON
832        let loaded_json = storage
833            .load_llms_json("react")
834            .expect("Should load llms.json");
835        assert_eq!(llms_json.source, loaded_json.source);
836        assert_eq!(llms_json.metadata.url, loaded_json.metadata.url);
837        assert_eq!(
838            llms_json.line_index.total_lines,
839            loaded_json.line_index.total_lines
840        );
841    }
842
843    #[test]
844    fn test_source_exists() {
845        let (storage, _temp_dir) = create_test_storage();
846
847        // Initially should not exist
848        assert!(!storage.exists("react"));
849
850        // After saving llms.json, should exist
851        let llms_json = create_test_llms_json("react");
852        storage
853            .save_llms_json("react", &llms_json)
854            .expect("Should save");
855
856        assert!(storage.exists("react"));
857    }
858
859    #[test]
860    fn test_list_sources_empty() {
861        let (storage, _temp_dir) = create_test_storage();
862
863        let sources = storage.list_sources();
864        assert!(sources.is_empty());
865    }
866
867    #[test]
868    fn test_list_sources_with_data() {
869        let (storage, _temp_dir) = create_test_storage();
870
871        // Add multiple sources
872        let aliases = ["react", "nextjs", "rust"];
873        for &alias in &aliases {
874            let llms_json = create_test_llms_json(alias);
875            storage
876                .save_llms_json(alias, &llms_json)
877                .expect("Should save");
878        }
879
880        let sources = storage.list_sources();
881        assert_eq!(sources.len(), 3);
882
883        // Should be sorted
884        assert_eq!(sources, vec!["nextjs", "react", "rust"]);
885    }
886
887    #[test]
888    fn test_list_sources_ignores_hidden_dirs() {
889        let (storage, temp_dir) = create_test_storage();
890
891        // Create a hidden directory
892        let hidden_dir = temp_dir.path().join(".hidden");
893        fs::create_dir(&hidden_dir).expect("Should create hidden dir");
894
895        // Create a regular source
896        let llms_json = create_test_llms_json("react");
897        storage
898            .save_llms_json("react", &llms_json)
899            .expect("Should save");
900
901        let sources = storage.list_sources();
902        assert_eq!(sources.len(), 1);
903        assert_eq!(sources[0], "react");
904    }
905
906    #[test]
907    fn test_list_sources_requires_llms_json() {
908        let (storage, _temp_dir) = create_test_storage();
909
910        // Create tool directory without llms.json
911        storage
912            .ensure_tool_dir("incomplete")
913            .expect("Should create dir");
914
915        // Save only llms.txt (no llms.json)
916        storage
917            .save_llms_txt("incomplete", "# Test content")
918            .expect("Should save txt");
919
920        // Create another source with complete data
921        let llms_json = create_test_llms_json("complete");
922        storage
923            .save_llms_json("complete", &llms_json)
924            .expect("Should save json");
925
926        let sources = storage.list_sources();
927        assert_eq!(sources.len(), 1);
928        assert_eq!(sources[0], "complete");
929    }
930
931    #[test]
932    fn test_archive_functionality() {
933        let (storage, _temp_dir) = create_test_storage();
934
935        // Create source data
936        let content = "# Test content";
937        let llms_json = create_test_llms_json("test");
938
939        storage
940            .save_llms_txt("test", content)
941            .expect("Should save txt");
942        storage
943            .save_llms_json("test", &llms_json)
944            .expect("Should save json");
945
946        // Archive the source
947        storage.archive("test").expect("Should archive");
948
949        // Verify archive directory exists
950        let archive_dir = storage.archive_dir("test").expect("Should get archive dir");
951        assert!(archive_dir.exists());
952
953        // Verify archived files exist (names contain timestamp)
954        let archive_entries: Vec<_> = fs::read_dir(&archive_dir)
955            .expect("Should read archive dir")
956            .collect::<std::result::Result<Vec<_>, std::io::Error>>()
957            .expect("Should collect entries");
958
959        assert_eq!(archive_entries.len(), 2); // llms.txt and llms.json
960
961        // Verify archived files have correct names
962        let mut has_txt = false;
963        let mut has_json = false;
964        for entry in archive_entries {
965            let name = entry.file_name().to_string_lossy().to_string();
966            if name.contains("llms.txt") {
967                has_txt = true;
968            }
969            if name.contains("llms.json") {
970                has_json = true;
971            }
972        }
973
974        assert!(has_txt, "Should have archived llms.txt");
975        assert!(has_json, "Should have archived llms.json");
976    }
977
978    #[test]
979    fn test_archive_missing_files() {
980        let (storage, _temp_dir) = create_test_storage();
981
982        // Archive non-existent source - should not fail
983        let result = storage.archive("nonexistent");
984        assert!(result.is_ok());
985
986        // Archive directory should still be created
987        let archive_dir = storage
988            .archive_dir("nonexistent")
989            .expect("Should get archive dir");
990        assert!(archive_dir.exists());
991    }
992
993    #[test]
994    fn test_load_missing_files_returns_error() {
995        let (storage, _temp_dir) = create_test_storage();
996
997        let result = storage.load_llms_txt("nonexistent");
998        assert!(result.is_err());
999
1000        let result = storage.load_llms_json("nonexistent");
1001        assert!(result.is_err());
1002    }
1003
1004    #[test]
1005    fn test_json_serialization_roundtrip() {
1006        let (storage, _temp_dir) = create_test_storage();
1007
1008        let original = create_test_llms_json("test");
1009
1010        // Save and load
1011        storage
1012            .save_llms_json("test", &original)
1013            .expect("Should save");
1014        let loaded = storage.load_llms_json("test").expect("Should load");
1015
1016        // Verify all fields are preserved
1017        assert_eq!(original.source, loaded.source);
1018        assert_eq!(original.metadata.url, loaded.metadata.url);
1019        assert_eq!(original.metadata.sha256, loaded.metadata.sha256);
1020        assert_eq!(original.toc.len(), loaded.toc.len());
1021        assert_eq!(original.files.len(), loaded.files.len());
1022        assert_eq!(
1023            original.line_index.total_lines,
1024            loaded.line_index.total_lines
1025        );
1026        assert_eq!(original.diagnostics.len(), loaded.diagnostics.len());
1027    }
1028}