blz_core/
storage.rs

1use crate::{Error, Flavor, LlmsJson, Result, Source};
2use chrono::Utc;
3use directories::ProjectDirs;
4use std::fs;
5use std::path::{Path, PathBuf};
6use tracing::{debug, info, warn};
7
8/// Maximum allowed alias length to match CLI constraints
9const MAX_ALIAS_LEN: usize = 64;
10
11/// Local filesystem storage for cached llms.txt documentation
12pub struct Storage {
13    root_dir: PathBuf,
14}
15
16impl Storage {
17    fn sanitize_variant_file_name(name: &str) -> String {
18        // Only allow a conservative set of filename characters to avoid
19        // accidentally writing outside the tool directory or producing
20        // surprising paths. Anything else becomes an underscore so that the
21        // resulting filename stays predictable and safe to use across
22        // platforms.
23        let mut sanitized: String = name
24            .chars()
25            .map(|c| {
26                if c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-') {
27                    c
28                } else {
29                    '_'
30                }
31            })
32            .collect();
33
34        // Collapse any ".." segments that could be introduced either by the
35        // caller or by the substitution above. This keeps the path rooted at
36        // the alias directory even if callers pass traversal attempts.
37        while sanitized.contains("..") {
38            sanitized = sanitized.replace("..", "_");
39        }
40
41        if sanitized.is_empty() {
42            "llms.txt".to_string()
43        } else {
44            sanitized
45        }
46    }
47
48    fn flavor_json_filename(flavor: &str) -> String {
49        if flavor.eq_ignore_ascii_case("llms") {
50            "llms.json".to_string()
51        } else {
52            format!("{flavor}.json")
53        }
54    }
55
56    fn flavor_metadata_filename(flavor: &str) -> String {
57        if flavor.eq_ignore_ascii_case("llms") {
58            "metadata.json".to_string()
59        } else {
60            format!("metadata-{flavor}.json")
61        }
62    }
63
64    /// Determine the appropriate flavor based on the requested URL.
65    pub fn flavor_from_url(url: &str) -> Flavor {
66        url.rsplit('/')
67            .next()
68            .and_then(Flavor::from_file_name)
69            .unwrap_or(Flavor::Llms)
70    }
71
72    /// Creates a new storage instance with the default root directory
73    pub fn new() -> Result<Self> {
74        // Test/dev override: allow BLZ_DATA_DIR to set the root directory explicitly
75        if let Ok(dir) = std::env::var("BLZ_DATA_DIR") {
76            let root = PathBuf::from(dir);
77            return Self::with_root(root);
78        }
79
80        let project_dirs = ProjectDirs::from("dev", "outfitter", "blz")
81            .ok_or_else(|| Error::Storage("Failed to determine project directories".into()))?;
82
83        let root_dir = project_dirs.data_dir().to_path_buf();
84
85        // Check for migration from old cache directory
86        Self::check_and_migrate_old_cache(&root_dir);
87
88        Self::with_root(root_dir)
89    }
90
91    /// Creates a new storage instance with a custom root directory
92    pub fn with_root(root_dir: PathBuf) -> Result<Self> {
93        fs::create_dir_all(&root_dir)
94            .map_err(|e| Error::Storage(format!("Failed to create root directory: {e}")))?;
95
96        Ok(Self { root_dir })
97    }
98
99    /// Returns the directory path for a given alias
100    pub fn tool_dir(&self, alias: &str) -> Result<PathBuf> {
101        // Validate alias to prevent directory traversal attacks
102        Self::validate_alias(alias)?;
103        Ok(self.root_dir.join(alias))
104    }
105
106    /// Resolve the on-disk path for a specific flavored content file.
107    fn variant_file_path(&self, alias: &str, file_name: &str) -> Result<PathBuf> {
108        let sanitized = Self::sanitize_variant_file_name(file_name);
109        Ok(self.tool_dir(alias)?.join(sanitized))
110    }
111
112    /// Ensures the directory for an alias exists and returns its path
113    pub fn ensure_tool_dir(&self, alias: &str) -> Result<PathBuf> {
114        let dir = self.tool_dir(alias)?;
115        fs::create_dir_all(&dir)
116            .map_err(|e| Error::Storage(format!("Failed to create tool directory: {e}")))?;
117        Ok(dir)
118    }
119
120    /// Validate that an alias is safe to use as a directory name
121    ///
122    /// This validation is unified with CLI constraints to prevent inconsistencies
123    /// between what the CLI accepts and what storage can handle.
124    fn validate_alias(alias: &str) -> Result<()> {
125        // Check for empty alias
126        if alias.is_empty() {
127            return Err(Error::Storage("Alias cannot be empty".into()));
128        }
129
130        // Disallow leading hyphen to avoid CLI parsing ambiguities
131        if alias.starts_with('-') {
132            return Err(Error::Storage(format!(
133                "Invalid alias '{alias}': cannot start with '-'"
134            )));
135        }
136
137        // Check for path traversal attempts
138        if alias.contains("..") || alias.contains('/') || alias.contains('\\') {
139            return Err(Error::Storage(format!(
140                "Invalid alias '{alias}': contains path traversal characters"
141            )));
142        }
143
144        // Check for special filesystem characters
145        if alias.starts_with('.') || alias.contains('\0') {
146            return Err(Error::Storage(format!(
147                "Invalid alias '{alias}': contains invalid filesystem characters"
148            )));
149        }
150
151        // Check for reserved names on Windows
152        #[cfg(target_os = "windows")]
153        {
154            const RESERVED_NAMES: &[&str] = &[
155                "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
156                "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
157                "LPT9",
158            ];
159
160            let upper_alias = alias.to_uppercase();
161            if RESERVED_NAMES.contains(&upper_alias.as_str()) {
162                return Err(Error::Storage(format!(
163                    "Invalid alias '{}': reserved name on Windows",
164                    alias
165                )));
166            }
167        }
168
169        // Check length (keep consistent with CLI policy)
170        if alias.len() > MAX_ALIAS_LEN {
171            return Err(Error::Storage(format!(
172                "Invalid alias '{alias}': exceeds maximum length of {MAX_ALIAS_LEN} characters"
173            )));
174        }
175
176        // Only allow ASCII alphanumeric, dash, underscore
177        if !alias
178            .chars()
179            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
180        {
181            return Err(Error::Storage(format!(
182                "Invalid alias '{alias}': only [A-Za-z0-9_-] are allowed"
183            )));
184        }
185
186        Ok(())
187    }
188
189    /// Returns the path to the llms.txt file for an alias
190    pub fn llms_txt_path(&self, alias: &str) -> Result<PathBuf> {
191        self.variant_file_path(alias, "llms.txt")
192    }
193
194    /// Returns the path to the llms.json file for an alias
195    pub fn llms_json_path(&self, alias: &str) -> Result<PathBuf> {
196        self.flavor_json_path(alias, "llms")
197    }
198
199    /// Compute the metadata JSON path for a given flavor.
200    pub fn flavor_json_path(&self, alias: &str, flavor: &str) -> Result<PathBuf> {
201        let file = Self::flavor_json_filename(flavor);
202        Ok(self.tool_dir(alias)?.join(file))
203    }
204
205    /// Returns the path to the search index directory for an alias
206    pub fn index_dir(&self, alias: &str) -> Result<PathBuf> {
207        Ok(self.tool_dir(alias)?.join(".index"))
208    }
209
210    /// Returns the path to the archive directory for an alias
211    pub fn archive_dir(&self, alias: &str) -> Result<PathBuf> {
212        Ok(self.tool_dir(alias)?.join(".archive"))
213    }
214
215    /// Returns the path to the metadata file for an alias
216    pub fn metadata_path(&self, alias: &str) -> Result<PathBuf> {
217        self.metadata_path_for_flavor(alias, "llms")
218    }
219
220    /// Compute the metadata path for a given flavor.
221    pub fn metadata_path_for_flavor(&self, alias: &str, flavor: &str) -> Result<PathBuf> {
222        let file = Self::flavor_metadata_filename(flavor);
223        Ok(self.tool_dir(alias)?.join(file))
224    }
225
226    /// Returns the path to the anchors mapping file for an alias
227    pub fn anchors_map_path(&self, alias: &str) -> Result<PathBuf> {
228        Ok(self.tool_dir(alias)?.join("anchors.json"))
229    }
230
231    /// Saves the llms.txt content for an alias
232    pub fn save_llms_txt(&self, alias: &str, content: &str) -> Result<()> {
233        self.save_flavor_content(alias, "llms.txt", content)
234    }
235
236    /// Saves content for a specific flavored variant (e.g., llms-full.txt)
237    pub fn save_flavor_content(&self, alias: &str, file_name: &str, content: &str) -> Result<()> {
238        self.ensure_tool_dir(alias)?;
239        let path = self.variant_file_path(alias, file_name)?;
240
241        let tmp_path = path.with_extension("tmp");
242        fs::write(&tmp_path, content)
243            .map_err(|e| Error::Storage(format!("Failed to write {file_name}: {e}")))?;
244
245        #[cfg(target_os = "windows")]
246        if path.exists() {
247            fs::remove_file(&path).map_err(|e| {
248                Error::Storage(format!("Failed to remove existing {file_name}: {e}"))
249            })?;
250        }
251
252        fs::rename(&tmp_path, &path)
253            .map_err(|e| Error::Storage(format!("Failed to commit {file_name}: {e}")))?;
254
255        debug!("Saved {file_name} for {}", alias);
256        Ok(())
257    }
258
259    /// Loads the llms.txt content for an alias
260    pub fn load_llms_txt(&self, alias: &str) -> Result<String> {
261        let path = self.llms_txt_path(alias)?;
262        fs::read_to_string(&path)
263            .map_err(|e| Error::Storage(format!("Failed to read llms.txt: {e}")))
264    }
265
266    /// Saves the parsed llms.json data for the default flavor
267    pub fn save_llms_json(&self, alias: &str, data: &LlmsJson) -> Result<()> {
268        self.save_flavor_json(alias, "llms", data)
269    }
270
271    /// Saves the parsed llms.json data for a specific flavor
272    pub fn save_flavor_json(&self, alias: &str, flavor: &str, data: &LlmsJson) -> Result<()> {
273        self.ensure_tool_dir(alias)?;
274        let path = self.flavor_json_path(alias, flavor)?;
275        let json = serde_json::to_string_pretty(data)
276            .map_err(|e| Error::Storage(format!("Failed to serialize JSON: {e}")))?;
277
278        let tmp_path = path.with_extension("json.tmp");
279        fs::write(&tmp_path, json)
280            .map_err(|e| Error::Storage(format!("Failed to write {flavor} metadata: {e}")))?;
281
282        #[cfg(target_os = "windows")]
283        if path.exists() {
284            fs::remove_file(&path).map_err(|e| {
285                Error::Storage(format!("Failed to remove existing {flavor} metadata: {e}"))
286            })?;
287        }
288        fs::rename(&tmp_path, &path)
289            .map_err(|e| Error::Storage(format!("Failed to commit {flavor} metadata: {e}")))?;
290
291        debug!("Saved {flavor} metadata for {}", alias);
292        Ok(())
293    }
294
295    /// Loads the parsed llms.json data for the default flavor
296    pub fn load_llms_json(&self, alias: &str) -> Result<LlmsJson> {
297        self.load_flavor_json(alias, "llms").and_then(|opt| {
298            opt.ok_or_else(|| Error::Storage(format!("llms.json missing for alias '{alias}'")))
299        })
300    }
301
302    /// Loads the parsed llms.json data for a specific flavor, returning None if absent
303    pub fn load_flavor_json(&self, alias: &str, flavor: &str) -> Result<Option<LlmsJson>> {
304        let path = self.flavor_json_path(alias, flavor)?;
305        if !path.exists() {
306            return Ok(None);
307        }
308        let json = fs::read_to_string(&path)
309            .map_err(|e| Error::Storage(format!("Failed to read {}: {e}", path.display())))?;
310        let data = serde_json::from_str(&json)
311            .map_err(|e| Error::Storage(format!("Failed to parse JSON: {e}")))?;
312        Ok(Some(data))
313    }
314
315    /// Saves source metadata for an alias
316    pub fn save_source_metadata(&self, alias: &str, source: &Source) -> Result<()> {
317        self.save_source_metadata_for_flavor(alias, "llms", source)
318    }
319
320    /// Persist source metadata for a specific flavor.
321    pub fn save_source_metadata_for_flavor(
322        &self,
323        alias: &str,
324        flavor: &str,
325        source: &Source,
326    ) -> Result<()> {
327        self.ensure_tool_dir(alias)?;
328        let path = self.metadata_path_for_flavor(alias, flavor)?;
329        let json = serde_json::to_string_pretty(source)
330            .map_err(|e| Error::Storage(format!("Failed to serialize metadata: {e}")))?;
331
332        // Write to a temp file first to ensure atomicity
333        let tmp_path = path.with_extension("json.tmp");
334        fs::write(&tmp_path, &json)
335            .map_err(|e| Error::Storage(format!("Failed to write temp metadata: {e}")))?;
336
337        // Atomically rename temp file to final path (handle Windows overwrite)
338        #[cfg(target_os = "windows")]
339        if path.exists() {
340            fs::remove_file(&path)
341                .map_err(|e| Error::Storage(format!("Failed to remove existing metadata: {e}")))?;
342        }
343        fs::rename(&tmp_path, &path)
344            .map_err(|e| Error::Storage(format!("Failed to persist metadata: {e}")))?;
345
346        debug!("Saved {flavor} metadata for {}", alias);
347        Ok(())
348    }
349
350    /// Save anchors remap JSON for an alias
351    pub fn save_anchors_map(&self, alias: &str, map: &crate::AnchorsMap) -> Result<()> {
352        self.ensure_tool_dir(alias)?;
353        let path = self.anchors_map_path(alias)?;
354        let json = serde_json::to_string_pretty(map)
355            .map_err(|e| Error::Storage(format!("Failed to serialize anchors map: {e}")))?;
356        fs::write(&path, json)
357            .map_err(|e| Error::Storage(format!("Failed to write anchors map: {e}")))?;
358        Ok(())
359    }
360
361    /// Loads source metadata for an alias if it exists
362    pub fn load_source_metadata(&self, alias: &str) -> Result<Option<Source>> {
363        self.load_source_metadata_for_flavor(alias, "llms")
364    }
365
366    /// Load source metadata for a specific flavor if present.
367    pub fn load_source_metadata_for_flavor(
368        &self,
369        alias: &str,
370        flavor: &str,
371    ) -> Result<Option<Source>> {
372        let path = self.metadata_path_for_flavor(alias, flavor)?;
373        if !path.exists() {
374            return Ok(None);
375        }
376        let json = fs::read_to_string(&path)
377            .map_err(|e| Error::Storage(format!("Failed to read metadata: {e}")))?;
378        let source = serde_json::from_str(&json)
379            .map_err(|e| Error::Storage(format!("Failed to parse metadata: {e}")))?;
380        Ok(Some(source))
381    }
382
383    /// Checks if an alias exists in storage
384    #[must_use]
385    pub fn exists(&self, alias: &str) -> bool {
386        self.llms_json_path(alias)
387            .map(|path| path.exists())
388            .unwrap_or(false)
389    }
390
391    /// Checks if any flavor has been persisted for the alias.
392    #[must_use]
393    pub fn exists_any_flavor(&self, alias: &str) -> bool {
394        if self.exists(alias) {
395            return true;
396        }
397
398        self.available_flavors(alias)
399            .map(|flavors| !flavors.is_empty())
400            .unwrap_or(false)
401    }
402
403    /// Lists all available documentation flavors persisted for a given alias.
404    ///
405    /// Flavors correspond to the JSON artifacts produced during ingest, e.g.
406    /// `llms.json` and `llms-full.json`. Metadata sidecars (like
407    /// `metadata.json`) and other auxiliary files are excluded.
408    pub fn available_flavors(&self, alias: &str) -> Result<Vec<String>> {
409        let dir = self.tool_dir(alias)?;
410        if !dir.exists() {
411            return Ok(Vec::new());
412        }
413
414        let mut flavors = Vec::new();
415        let entries = fs::read_dir(&dir)
416            .map_err(|e| Error::Storage(format!("Failed to read tool directory: {e}")))?;
417
418        for entry in entries {
419            let entry = entry
420                .map_err(|e| Error::Storage(format!("Failed to read directory entry: {e}")))?;
421            let path = entry.path();
422
423            if !path.is_file() {
424                continue;
425            }
426
427            if !path
428                .extension()
429                .and_then(|ext| ext.to_str())
430                .is_some_and(|ext| ext.eq_ignore_ascii_case("json"))
431            {
432                continue;
433            }
434
435            if let (Some(stem), Some(ext)) = (
436                path.file_stem().and_then(|s| s.to_str()),
437                path.extension().and_then(|s| s.to_str()),
438            ) {
439                if !ext.eq_ignore_ascii_case("json") {
440                    continue;
441                }
442
443                // Only include llms*.json artifacts (e.g., llms.json, llms-full.json)
444                let stem_lower = stem.trim().to_ascii_lowercase();
445                if stem_lower == "llms" || stem_lower.starts_with("llms-") {
446                    flavors.push(stem_lower);
447                }
448            }
449        }
450
451        flavors.sort();
452        flavors.dedup();
453        Ok(flavors)
454    }
455
456    /// Lists all cached source aliases
457    #[must_use]
458    pub fn list_sources(&self) -> Vec<String> {
459        let mut sources = Vec::new();
460
461        if let Ok(entries) = fs::read_dir(&self.root_dir) {
462            for entry in entries.flatten() {
463                if entry.path().is_dir() {
464                    if let Some(name) = entry.file_name().to_str() {
465                        if !name.starts_with('.') && self.exists_any_flavor(name) {
466                            sources.push(name.to_string());
467                        }
468                    }
469                }
470            }
471        }
472
473        sources.sort();
474        sources
475    }
476
477    /// Archives the current version of an alias
478    pub fn archive(&self, alias: &str) -> Result<()> {
479        let archive_dir = self.archive_dir(alias)?;
480        fs::create_dir_all(&archive_dir)
481            .map_err(|e| Error::Storage(format!("Failed to create archive directory: {e}")))?;
482
483        // Include seconds for uniqueness and clearer chronology
484        let timestamp = Utc::now().format("%Y-%m-%dT%H-%M-%SZ");
485
486        // Archive all llms*.json and llms*.txt files (multi-flavor support)
487        let dir = self.tool_dir(alias)?;
488        if dir.exists() {
489            for entry in fs::read_dir(&dir)
490                .map_err(|e| Error::Storage(format!("Failed to read dir for archive: {e}")))?
491            {
492                let entry =
493                    entry.map_err(|e| Error::Storage(format!("Failed to read entry: {e}")))?;
494                let path = entry.path();
495                if !path.is_file() {
496                    continue;
497                }
498                let name = entry.file_name();
499                let name_str = name.to_string_lossy().to_lowercase();
500                // Archive only llms*.json / llms*.txt (skip metadata/anchors)
501                let is_json = std::path::Path::new(&name_str)
502                    .extension()
503                    .is_some_and(|ext| ext.eq_ignore_ascii_case("json"));
504                let is_txt = std::path::Path::new(&name_str)
505                    .extension()
506                    .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"));
507                let is_llms_artifact = (is_json || is_txt) && name_str.starts_with("llms");
508                if is_llms_artifact {
509                    let archive_path =
510                        archive_dir.join(format!("{timestamp}-{}", name.to_string_lossy()));
511                    fs::copy(&path, &archive_path).map_err(|e| {
512                        Error::Storage(format!("Failed to archive {}: {e}", path.display()))
513                    })?;
514                }
515            }
516        }
517
518        info!("Archived {} at {}", alias, timestamp);
519        Ok(())
520    }
521
522    /// Check for old cache directory and migrate if needed
523    fn check_and_migrate_old_cache(new_root: &Path) {
524        // Try to find the old cache directory
525        let old_project_dirs = ProjectDirs::from("dev", "outfitter", "cache");
526
527        if let Some(old_dirs) = old_project_dirs {
528            let old_root = old_dirs.data_dir();
529
530            // Check if old directory exists and has content
531            if old_root.exists() && old_root.is_dir() {
532                // Check if there's actually content to migrate (look for llms.json files)
533                let has_content = fs::read_dir(old_root)
534                    .map(|entries| {
535                        entries.filter_map(std::result::Result::ok).any(|entry| {
536                            let path = entry.path();
537                            if !path.is_dir() {
538                                return false;
539                            }
540                            let has_llms_json = path.join("llms.json").exists();
541                            let has_llms_txt = path.join("llms.txt").exists();
542                            let has_metadata = path.join("metadata.json").exists();
543                            has_llms_json || has_llms_txt || has_metadata
544                        })
545                    })
546                    .unwrap_or(false);
547                if has_content {
548                    // Check if new directory already exists with content
549                    if new_root.exists()
550                        && fs::read_dir(new_root)
551                            .map(|mut e| e.next().is_some())
552                            .unwrap_or(false)
553                    {
554                        // New directory already has content, just log a warning
555                        warn!(
556                            "Found old cache at {} but new cache at {} already exists. \
557                             Manual migration may be needed if you want to preserve old data.",
558                            old_root.display(),
559                            new_root.display()
560                        );
561                    } else {
562                        // Attempt migration
563                        info!(
564                            "Migrating cache from old location {} to new location {}",
565                            old_root.display(),
566                            new_root.display()
567                        );
568
569                        if let Err(e) = Self::migrate_directory(old_root, new_root) {
570                            // Log warning but don't fail - let the user continue with fresh cache
571                            warn!(
572                                "Could not automatically migrate cache: {}. \
573                                 Starting with fresh cache at {}. \
574                                 To manually migrate, copy contents from {} to {}",
575                                e,
576                                new_root.display(),
577                                old_root.display(),
578                                new_root.display()
579                            );
580                        } else {
581                            info!("Successfully migrated cache to new location");
582                        }
583                    }
584                }
585            }
586        }
587    }
588
589    /// Recursively copy directory contents from old to new location
590    fn migrate_directory(from: &Path, to: &Path) -> Result<()> {
591        // Create target directory if it doesn't exist
592        fs::create_dir_all(to)
593            .map_err(|e| Error::Storage(format!("Failed to create migration target: {e}")))?;
594
595        // Copy all entries
596        for entry in fs::read_dir(from)
597            .map_err(|e| Error::Storage(format!("Failed to read migration source: {e}")))?
598        {
599            let entry = entry
600                .map_err(|e| Error::Storage(format!("Failed to read directory entry: {e}")))?;
601            let path = entry.path();
602            let file_name = entry.file_name();
603            let target_path = to.join(&file_name);
604
605            if path.is_dir() {
606                // Recursively copy subdirectory
607                Self::migrate_directory(&path, &target_path)?;
608            } else {
609                // Copy file
610                fs::copy(&path, &target_path).map_err(|e| {
611                    Error::Storage(format!("Failed to copy file during migration: {e}"))
612                })?;
613            }
614        }
615
616        Ok(())
617    }
618}
619
620// Note: Default is not implemented as Storage::new() can fail.
621// Use Storage::new() directly and handle the Result.
622
623#[cfg(test)]
624#[allow(clippy::unwrap_used)]
625mod tests {
626    use super::*;
627    use crate::types::{FileInfo, LineIndex, Source, TocEntry};
628    use std::fs;
629    use tempfile::TempDir;
630
631    fn create_test_storage() -> (Storage, TempDir) {
632        let temp_dir = TempDir::new().expect("Failed to create temp directory");
633        let storage = Storage::with_root(temp_dir.path().to_path_buf())
634            .expect("Failed to create test storage");
635        (storage, temp_dir)
636    }
637
638    fn create_test_llms_json(alias: &str) -> LlmsJson {
639        LlmsJson {
640            alias: alias.to_string(),
641            source: Source {
642                url: format!("https://example.com/{alias}/llms.txt"),
643                etag: Some("abc123".to_string()),
644                last_modified: None,
645                fetched_at: Utc::now(),
646                sha256: "deadbeef".to_string(),
647                aliases: Vec::new(),
648            },
649            toc: vec![TocEntry {
650                heading_path: vec!["Getting Started".to_string()],
651                lines: "1-50".to_string(),
652                anchor: None,
653                children: vec![],
654            }],
655            files: vec![FileInfo {
656                path: "llms.txt".to_string(),
657                sha256: "deadbeef".to_string(),
658            }],
659            line_index: LineIndex {
660                total_lines: 100,
661                byte_offsets: false,
662            },
663            diagnostics: vec![],
664            parse_meta: None,
665        }
666    }
667
668    #[test]
669    fn test_storage_creation_with_root() {
670        let temp_dir = TempDir::new().expect("Failed to create temp directory");
671        let storage = Storage::with_root(temp_dir.path().to_path_buf());
672
673        assert!(storage.is_ok());
674        let _storage = storage.unwrap();
675
676        // Verify root directory was created
677        assert!(temp_dir.path().exists());
678    }
679
680    #[test]
681    fn test_tool_directory_paths() {
682        let (storage, _temp_dir) = create_test_storage();
683
684        let tool_dir = storage.tool_dir("react").expect("Should get tool dir");
685        let llms_txt_path = storage
686            .llms_txt_path("react")
687            .expect("Should get llms.txt path");
688        let llms_json_path = storage
689            .llms_json_path("react")
690            .expect("Should get llms.json path");
691        let index_dir = storage.index_dir("react").expect("Should get index dir");
692        let archive_dir = storage
693            .archive_dir("react")
694            .expect("Should get archive dir");
695
696        assert!(tool_dir.ends_with("react"));
697        assert!(llms_txt_path.ends_with("react/llms.txt"));
698        assert!(llms_json_path.ends_with("react/llms.json"));
699        assert!(index_dir.ends_with("react/.index"));
700        assert!(archive_dir.ends_with("react/.archive"));
701    }
702
703    #[test]
704    fn test_invalid_alias_validation() {
705        let (storage, _temp_dir) = create_test_storage();
706
707        // Test path traversal attempts
708        assert!(storage.tool_dir("../etc").is_err());
709        assert!(storage.tool_dir("../../passwd").is_err());
710        assert!(storage.tool_dir("test/../../../etc").is_err());
711
712        // Test invalid characters
713        assert!(storage.tool_dir(".hidden").is_err());
714        assert!(storage.tool_dir("test\0null").is_err());
715        assert!(storage.tool_dir("test/slash").is_err());
716        assert!(storage.tool_dir("test\\backslash").is_err());
717
718        // Test empty alias
719        assert!(storage.tool_dir("").is_err());
720
721        // Test valid aliases
722        assert!(storage.tool_dir("react").is_ok());
723        assert!(storage.tool_dir("my-tool").is_ok());
724        assert!(storage.tool_dir("tool_123").is_ok());
725    }
726
727    #[test]
728    fn test_ensure_tool_directory() {
729        let (storage, _temp_dir) = create_test_storage();
730
731        let tool_dir = storage
732            .ensure_tool_dir("react")
733            .expect("Should create tool dir");
734        assert!(tool_dir.exists());
735
736        // Should be idempotent
737        let tool_dir2 = storage
738            .ensure_tool_dir("react")
739            .expect("Should not fail on existing dir");
740        assert_eq!(tool_dir, tool_dir2);
741    }
742
743    #[test]
744    fn test_save_and_load_llms_txt() {
745        let (storage, _temp_dir) = create_test_storage();
746
747        let content = "# React Documentation\n\nThis is the React documentation...";
748
749        // Save content
750        storage
751            .save_llms_txt("react", content)
752            .expect("Should save llms.txt");
753
754        // Verify file exists
755        assert!(
756            storage
757                .llms_txt_path("react")
758                .expect("Should get path")
759                .exists()
760        );
761
762        // Load content
763        let loaded_content = storage
764            .load_llms_txt("react")
765            .expect("Should load llms.txt");
766        assert_eq!(content, loaded_content);
767    }
768
769    #[test]
770    fn test_save_and_load_llms_json() {
771        let (storage, _temp_dir) = create_test_storage();
772
773        let llms_json = create_test_llms_json("react");
774
775        // Save JSON
776        storage
777            .save_llms_json("react", &llms_json)
778            .expect("Should save llms.json");
779
780        // Verify file exists
781        assert!(
782            storage
783                .llms_json_path("react")
784                .expect("Should get path")
785                .exists()
786        );
787
788        // Load JSON
789        let loaded_json = storage
790            .load_llms_json("react")
791            .expect("Should load llms.json");
792        assert_eq!(llms_json.alias, loaded_json.alias);
793        assert_eq!(llms_json.source.url, loaded_json.source.url);
794        assert_eq!(
795            llms_json.line_index.total_lines,
796            loaded_json.line_index.total_lines
797        );
798    }
799
800    #[test]
801    fn test_source_exists() {
802        let (storage, _temp_dir) = create_test_storage();
803
804        // Initially should not exist
805        assert!(!storage.exists("react"));
806
807        // After saving llms.json, should exist
808        let llms_json = create_test_llms_json("react");
809        storage
810            .save_llms_json("react", &llms_json)
811            .expect("Should save");
812
813        assert!(storage.exists("react"));
814    }
815
816    #[test]
817    fn test_list_sources_empty() {
818        let (storage, _temp_dir) = create_test_storage();
819
820        let sources = storage.list_sources();
821        assert!(sources.is_empty());
822    }
823
824    #[test]
825    fn test_list_sources_with_data() {
826        let (storage, _temp_dir) = create_test_storage();
827
828        // Add multiple sources
829        let aliases = ["react", "nextjs", "rust"];
830        for &alias in &aliases {
831            let llms_json = create_test_llms_json(alias);
832            storage
833                .save_llms_json(alias, &llms_json)
834                .expect("Should save");
835        }
836
837        let sources = storage.list_sources();
838        assert_eq!(sources.len(), 3);
839
840        // Should be sorted
841        assert_eq!(sources, vec!["nextjs", "react", "rust"]);
842    }
843
844    #[test]
845    fn test_list_sources_ignores_hidden_dirs() {
846        let (storage, temp_dir) = create_test_storage();
847
848        // Create a hidden directory
849        let hidden_dir = temp_dir.path().join(".hidden");
850        fs::create_dir(&hidden_dir).expect("Should create hidden dir");
851
852        // Create a regular source
853        let llms_json = create_test_llms_json("react");
854        storage
855            .save_llms_json("react", &llms_json)
856            .expect("Should save");
857
858        let sources = storage.list_sources();
859        assert_eq!(sources.len(), 1);
860        assert_eq!(sources[0], "react");
861    }
862
863    #[test]
864    fn test_list_sources_requires_llms_json() {
865        let (storage, _temp_dir) = create_test_storage();
866
867        // Create tool directory without llms.json
868        storage
869            .ensure_tool_dir("incomplete")
870            .expect("Should create dir");
871
872        // Save only llms.txt (no llms.json)
873        storage
874            .save_llms_txt("incomplete", "# Test content")
875            .expect("Should save txt");
876
877        // Create another source with complete data
878        let llms_json = create_test_llms_json("complete");
879        storage
880            .save_llms_json("complete", &llms_json)
881            .expect("Should save json");
882
883        let sources = storage.list_sources();
884        assert_eq!(sources.len(), 1);
885        assert_eq!(sources[0], "complete");
886    }
887
888    #[test]
889    fn test_available_flavors_empty_when_alias_missing() {
890        let (storage, _temp_dir) = create_test_storage();
891        let flavors = storage
892            .available_flavors("unknown")
893            .expect("should handle missing alias");
894        assert!(flavors.is_empty());
895    }
896
897    #[test]
898    fn test_available_flavors_lists_variants() {
899        let (storage, _temp_dir) = create_test_storage();
900
901        let llms_json = create_test_llms_json("react");
902        storage
903            .save_flavor_json("react", "llms", &llms_json)
904            .expect("should save llms json");
905        storage
906            .save_flavor_json("react", "llms-full", &llms_json)
907            .expect("should save llms-full json");
908
909        // Metadata files should be ignored
910        let metadata_path = storage
911            .metadata_path_for_flavor("react", "llms-full")
912            .expect("metadata path");
913        fs::write(&metadata_path, "{}").expect("write metadata");
914
915        let flavors = storage
916            .available_flavors("react")
917            .expect("should list flavors");
918        assert_eq!(flavors, vec!["llms".to_string(), "llms-full".to_string()]);
919    }
920
921    #[test]
922    fn test_archive_functionality() {
923        let (storage, _temp_dir) = create_test_storage();
924
925        // Create source data
926        let content = "# Test content";
927        let llms_json = create_test_llms_json("test");
928
929        storage
930            .save_llms_txt("test", content)
931            .expect("Should save txt");
932        storage
933            .save_llms_json("test", &llms_json)
934            .expect("Should save json");
935
936        // Archive the source
937        storage.archive("test").expect("Should archive");
938
939        // Verify archive directory exists
940        let archive_dir = storage.archive_dir("test").expect("Should get archive dir");
941        assert!(archive_dir.exists());
942
943        // Verify archived files exist (names contain timestamp)
944        let archive_entries: Vec<_> = fs::read_dir(&archive_dir)
945            .expect("Should read archive dir")
946            .collect::<std::result::Result<Vec<_>, std::io::Error>>()
947            .expect("Should collect entries");
948
949        assert_eq!(archive_entries.len(), 2); // llms.txt and llms.json
950
951        // Verify archived files have correct names
952        let mut has_txt = false;
953        let mut has_json = false;
954        for entry in archive_entries {
955            let name = entry.file_name().to_string_lossy().to_string();
956            if name.contains("llms.txt") {
957                has_txt = true;
958            }
959            if name.contains("llms.json") {
960                has_json = true;
961            }
962        }
963
964        assert!(has_txt, "Should have archived llms.txt");
965        assert!(has_json, "Should have archived llms.json");
966    }
967
968    #[test]
969    fn test_archive_missing_files() {
970        let (storage, _temp_dir) = create_test_storage();
971
972        // Archive non-existent source - should not fail
973        let result = storage.archive("nonexistent");
974        assert!(result.is_ok());
975
976        // Archive directory should still be created
977        let archive_dir = storage
978            .archive_dir("nonexistent")
979            .expect("Should get archive dir");
980        assert!(archive_dir.exists());
981    }
982
983    #[test]
984    fn test_load_missing_files_returns_error() {
985        let (storage, _temp_dir) = create_test_storage();
986
987        let result = storage.load_llms_txt("nonexistent");
988        assert!(result.is_err());
989
990        let result = storage.load_llms_json("nonexistent");
991        assert!(result.is_err());
992    }
993
994    #[test]
995    fn test_json_serialization_roundtrip() {
996        let (storage, _temp_dir) = create_test_storage();
997
998        let original = create_test_llms_json("test");
999
1000        // Save and load
1001        storage
1002            .save_llms_json("test", &original)
1003            .expect("Should save");
1004        let loaded = storage.load_llms_json("test").expect("Should load");
1005
1006        // Verify all fields are preserved
1007        assert_eq!(original.alias, loaded.alias);
1008        assert_eq!(original.source.url, loaded.source.url);
1009        assert_eq!(original.source.sha256, loaded.source.sha256);
1010        assert_eq!(original.toc.len(), loaded.toc.len());
1011        assert_eq!(original.files.len(), loaded.files.len());
1012        assert_eq!(
1013            original.line_index.total_lines,
1014            loaded.line_index.total_lines
1015        );
1016        assert_eq!(original.diagnostics.len(), loaded.diagnostics.len());
1017    }
1018}