Skip to main content

blz_core/
storage.rs

1use crate::{Error, LlmsJson, Result, Source, SourceDescriptor, profile};
2use chrono::Utc;
3use directories::{BaseDirs, ProjectDirs};
4use std::fs;
5use std::path::{Path, PathBuf};
6use tracing::{debug, info, warn};
7
8/// Maximum allowed alias length to match CLI constraints
9const MAX_ALIAS_LEN: usize = 64;
10
11/// Local filesystem storage for cached llms.txt documentation
12pub struct Storage {
13    root_dir: PathBuf,
14    config_dir: PathBuf,
15}
16
17impl Storage {
18    fn sanitize_variant_file_name(name: &str) -> String {
19        // Only allow a conservative set of filename characters to avoid
20        // accidentally writing outside the tool directory or producing
21        // surprising paths. Anything else becomes an underscore so that the
22        // resulting filename stays predictable and safe to use across
23        // platforms.
24        let mut sanitized: String = name
25            .chars()
26            .map(|c| {
27                if c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-') {
28                    c
29                } else {
30                    '_'
31                }
32            })
33            .collect();
34
35        // Collapse any ".." segments that could be introduced either by the
36        // caller or by the substitution above. This keeps the path rooted at
37        // the alias directory even if callers pass traversal attempts.
38        while sanitized.contains("..") {
39            sanitized = sanitized.replace("..", "_");
40        }
41
42        if sanitized.is_empty() {
43            "llms.txt".to_string()
44        } else {
45            sanitized
46        }
47    }
48
49    // Storage uses consistent filenames regardless of source URL:
50    // - llms.txt for content (even if fetched from llms-full.txt)
51    // - llms.json for parsed data
52    // - metadata.json for source metadata
53
54    /// Creates a new storage instance with the default root directory.
55    ///
56    /// # Errors
57    ///
58    /// Returns an error if the data or config directories cannot be resolved or created.
59    pub fn new() -> Result<Self> {
60        // Test/dev override: allow BLZ_DATA_DIR to set the root directory explicitly
61        if let Ok(dir) = std::env::var("BLZ_DATA_DIR") {
62            let root = PathBuf::from(dir);
63            let config_dir = Self::default_config_dir()?;
64            return Self::with_paths(root, config_dir);
65        }
66
67        // Use XDG_DATA_HOME if explicitly set
68        let root_dir = if let Ok(xdg) = std::env::var("XDG_DATA_HOME") {
69            let trimmed = xdg.trim();
70            if trimmed.is_empty() {
71                Self::fallback_data_dir()?
72            } else {
73                PathBuf::from(trimmed).join(profile::app_dir_slug())
74            }
75        } else {
76            Self::fallback_data_dir()?
77        };
78
79        // Check for migration from old cache directory
80        Self::check_and_migrate_old_cache(&root_dir);
81
82        let config_dir = Self::default_config_dir()?;
83        Self::with_paths(root_dir, config_dir)
84    }
85
86    /// Fallback data directory when `XDG_DATA_HOME` is not set
87    fn fallback_data_dir() -> Result<PathBuf> {
88        // Use ~/.blz/ for data (same location as config for non-XDG systems)
89        let home = directories::BaseDirs::new()
90            .ok_or_else(|| Error::Storage("Failed to determine home directory".into()))?;
91        Ok(home.home_dir().join(profile::dot_dir_slug()))
92    }
93
94    /// Determine the default configuration directory honoring overrides
95    fn default_config_dir() -> Result<PathBuf> {
96        if let Ok(dir) = std::env::var("BLZ_CONFIG_DIR") {
97            let trimmed = dir.trim();
98            if !trimmed.is_empty() {
99                return Ok(PathBuf::from(trimmed));
100            }
101        }
102
103        if let Ok(dir) = std::env::var("BLZ_GLOBAL_CONFIG_DIR") {
104            let trimmed = dir.trim();
105            if !trimmed.is_empty() {
106                return Ok(PathBuf::from(trimmed));
107            }
108        }
109
110        if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
111            let trimmed = xdg.trim();
112            if !trimmed.is_empty() {
113                return Ok(PathBuf::from(trimmed).join(profile::app_dir_slug()));
114            }
115        }
116
117        if let Some(base_dirs) = BaseDirs::new() {
118            return Ok(base_dirs.home_dir().join(profile::dot_dir_slug()));
119        }
120
121        Err(Error::Storage(
122            "Failed to determine configuration directory".into(),
123        ))
124    }
125
126    /// Creates a new storage instance with a custom root directory.
127    ///
128    /// # Errors
129    ///
130    /// Returns an error if the data or config directories cannot be created.
131    pub fn with_root(root_dir: PathBuf) -> Result<Self> {
132        let config_dir = root_dir.join("config");
133        Self::with_paths(root_dir, config_dir)
134    }
135
136    /// Creates a new storage instance with explicit data and config directories.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if the data or config directories cannot be created.
141    pub fn with_paths(root_dir: PathBuf, config_dir: PathBuf) -> Result<Self> {
142        fs::create_dir_all(&root_dir)
143            .map_err(|e| Error::Storage(format!("Failed to create root directory: {e}")))?;
144        fs::create_dir_all(&config_dir)
145            .map_err(|e| Error::Storage(format!("Failed to create config directory: {e}")))?;
146
147        Ok(Self {
148            root_dir,
149            config_dir,
150        })
151    }
152
153    /// Returns the root data directory path
154    #[must_use]
155    pub fn root_dir(&self) -> &Path {
156        &self.root_dir
157    }
158
159    /// Returns the root configuration directory path used for descriptors
160    #[must_use]
161    pub fn config_dir(&self) -> &Path {
162        &self.config_dir
163    }
164
165    fn descriptors_dir(&self) -> PathBuf {
166        self.config_dir.join("sources")
167    }
168
169    /// Returns the path to the descriptor TOML for a source.
170    ///
171    /// # Errors
172    ///
173    /// Returns an error if the alias is invalid.
174    pub fn descriptor_path(&self, alias: &str) -> Result<PathBuf> {
175        Self::validate_alias(alias)?;
176        Ok(self.descriptors_dir().join(format!("{alias}.toml")))
177    }
178
179    /// Persist a descriptor to disk, creating parent directories if necessary.
180    ///
181    /// # Errors
182    ///
183    /// Returns an error if the descriptor cannot be serialized or written.
184    pub fn save_descriptor(&self, descriptor: &SourceDescriptor) -> Result<()> {
185        let path = self.descriptor_path(&descriptor.alias)?;
186        if let Some(parent) = path.parent() {
187            fs::create_dir_all(parent)
188                .map_err(|e| Error::Storage(format!("Failed to create descriptor dir: {e}")))?;
189        }
190
191        let toml = toml::to_string_pretty(descriptor)
192            .map_err(|e| Error::Storage(format!("Failed to serialize descriptor: {e}")))?;
193        fs::write(&path, toml)
194            .map_err(|e| Error::Storage(format!("Failed to write descriptor: {e}")))?;
195        Ok(())
196    }
197
198    /// Load a descriptor if it exists.
199    ///
200    /// # Errors
201    ///
202    /// Returns an error if the descriptor cannot be read or parsed.
203    pub fn load_descriptor(&self, alias: &str) -> Result<Option<SourceDescriptor>> {
204        let path = self.descriptor_path(alias)?;
205        if !path.exists() {
206            return Ok(None);
207        }
208
209        let contents = fs::read_to_string(&path)
210            .map_err(|e| Error::Storage(format!("Failed to read descriptor: {e}")))?;
211        let descriptor = toml::from_str::<SourceDescriptor>(&contents)
212            .map_err(|e| Error::Storage(format!("Failed to parse descriptor: {e}")))?;
213        Ok(Some(descriptor))
214    }
215
216    /// Remove descriptor file for an alias if present.
217    ///
218    /// # Errors
219    ///
220    /// Returns an error if the descriptor cannot be removed.
221    pub fn remove_descriptor(&self, alias: &str) -> Result<()> {
222        let path = self.descriptor_path(alias)?;
223        if path.exists() {
224            fs::remove_file(&path)
225                .map_err(|e| Error::Storage(format!("Failed to remove descriptor: {e}")))?;
226        }
227        Ok(())
228    }
229
230    /// Returns the directory path for a given alias.
231    ///
232    /// # Errors
233    ///
234    /// Returns an error if the alias is invalid.
235    pub fn tool_dir(&self, source: &str) -> Result<PathBuf> {
236        // Validate alias to prevent directory traversal attacks
237        Self::validate_alias(source)?;
238        Ok(self.root_dir.join("sources").join(source))
239    }
240
241    /// Resolve the on-disk path for a specific flavored content file.
242    fn variant_file_path(&self, source: &str, file_name: &str) -> Result<PathBuf> {
243        let sanitized = Self::sanitize_variant_file_name(file_name);
244        Ok(self.tool_dir(source)?.join(sanitized))
245    }
246
247    /// Ensures the directory for an alias exists and returns its path.
248    ///
249    /// # Errors
250    ///
251    /// Returns an error if the alias is invalid or the directory cannot be created.
252    pub fn ensure_tool_dir(&self, source: &str) -> Result<PathBuf> {
253        let dir = self.tool_dir(source)?;
254        fs::create_dir_all(&dir)
255            .map_err(|e| Error::Storage(format!("Failed to create tool directory: {e}")))?;
256        Ok(dir)
257    }
258
259    /// Validate that an alias is safe to use as a directory name
260    ///
261    /// This validation is unified with CLI constraints to prevent inconsistencies
262    /// between what the CLI accepts and what storage can handle.
263    fn validate_alias(alias: &str) -> Result<()> {
264        // Check for empty alias
265        if alias.is_empty() {
266            return Err(Error::Storage("Alias cannot be empty".into()));
267        }
268
269        // Disallow leading hyphen to avoid CLI parsing ambiguities
270        if alias.starts_with('-') {
271            return Err(Error::Storage(format!(
272                "Invalid alias '{alias}': cannot start with '-'"
273            )));
274        }
275
276        // Check for path traversal attempts
277        if alias.contains("..") || alias.contains('/') || alias.contains('\\') {
278            return Err(Error::Storage(format!(
279                "Invalid alias '{alias}': contains path traversal characters"
280            )));
281        }
282
283        // Check for special filesystem characters
284        if alias.starts_with('.') || alias.contains('\0') {
285            return Err(Error::Storage(format!(
286                "Invalid alias '{alias}': contains invalid filesystem characters"
287            )));
288        }
289
290        // Check for reserved names on Windows
291        #[cfg(target_os = "windows")]
292        {
293            const RESERVED_NAMES: &[&str] = &[
294                "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
295                "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
296                "LPT9",
297            ];
298
299            let upper_alias = alias.to_uppercase();
300            if RESERVED_NAMES.contains(&upper_alias.as_str()) {
301                return Err(Error::Storage(format!(
302                    "Invalid alias '{}': reserved name on Windows",
303                    alias
304                )));
305            }
306        }
307
308        // Check length (keep consistent with CLI policy)
309        if alias.len() > MAX_ALIAS_LEN {
310            return Err(Error::Storage(format!(
311                "Invalid alias '{alias}': exceeds maximum length of {MAX_ALIAS_LEN} characters"
312            )));
313        }
314
315        // Only allow ASCII alphanumeric, dash, underscore
316        if !alias
317            .chars()
318            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
319        {
320            return Err(Error::Storage(format!(
321                "Invalid alias '{alias}': only [A-Za-z0-9_-] are allowed"
322            )));
323        }
324
325        Ok(())
326    }
327
328    /// Returns the path to the llms.txt file for a source.
329    ///
330    /// # Errors
331    ///
332    /// Returns an error if the alias is invalid.
333    pub fn llms_txt_path(&self, source: &str) -> Result<PathBuf> {
334        self.variant_file_path(source, "llms.txt")
335    }
336
337    /// Returns the path to the llms.json file for a source.
338    ///
339    /// # Errors
340    ///
341    /// Returns an error if the alias is invalid.
342    pub fn llms_json_path(&self, source: &str) -> Result<PathBuf> {
343        Ok(self.tool_dir(source)?.join("llms.json"))
344    }
345
346    /// Returns the path to the search index directory for a source.
347    ///
348    /// # Errors
349    ///
350    /// Returns an error if the alias is invalid.
351    pub fn index_dir(&self, source: &str) -> Result<PathBuf> {
352        Ok(self.tool_dir(source)?.join(".index"))
353    }
354
355    /// Returns the path to the archive directory for a source.
356    ///
357    /// # Errors
358    ///
359    /// Returns an error if the alias is invalid.
360    pub fn archive_dir(&self, source: &str) -> Result<PathBuf> {
361        Ok(self.tool_dir(source)?.join(".archive"))
362    }
363
364    /// Returns the path to the metadata file for a source.
365    ///
366    /// # Errors
367    ///
368    /// Returns an error if the alias is invalid.
369    pub fn metadata_path(&self, source: &str) -> Result<PathBuf> {
370        Ok(self.tool_dir(source)?.join("metadata.json"))
371    }
372
373    /// Returns the path to the anchors mapping file for a source.
374    ///
375    /// # Errors
376    ///
377    /// Returns an error if the alias is invalid.
378    pub fn anchors_map_path(&self, source: &str) -> Result<PathBuf> {
379        Ok(self.tool_dir(source)?.join("anchors.json"))
380    }
381
382    /// Saves the llms.txt content for a source.
383    ///
384    /// # Errors
385    ///
386    /// Returns an error if the file cannot be written or renamed.
387    pub fn save_llms_txt(&self, source: &str, content: &str) -> Result<()> {
388        self.ensure_tool_dir(source)?;
389        let path = self.llms_txt_path(source)?;
390
391        let tmp_path = path.with_extension("tmp");
392        fs::write(&tmp_path, content)
393            .map_err(|e| Error::Storage(format!("Failed to write llms.txt: {e}")))?;
394
395        #[cfg(target_os = "windows")]
396        if path.exists() {
397            fs::remove_file(&path)
398                .map_err(|e| Error::Storage(format!("Failed to remove existing llms.txt: {e}")))?;
399        }
400
401        fs::rename(&tmp_path, &path)
402            .map_err(|e| Error::Storage(format!("Failed to commit llms.txt: {e}")))?;
403
404        debug!("Saved llms.txt for {}", source);
405        Ok(())
406    }
407
408    /// Loads the llms.txt content for a source.
409    ///
410    /// # Errors
411    ///
412    /// Returns an error if the file cannot be read.
413    pub fn load_llms_txt(&self, source: &str) -> Result<String> {
414        let path = self.llms_txt_path(source)?;
415        fs::read_to_string(&path)
416            .map_err(|e| Error::Storage(format!("Failed to read llms.txt: {e}")))
417    }
418
419    /// Saves the parsed llms.json data for a source.
420    ///
421    /// # Errors
422    ///
423    /// Returns an error if the JSON cannot be serialized or written.
424    pub fn save_llms_json(&self, source: &str, data: &LlmsJson) -> Result<()> {
425        self.ensure_tool_dir(source)?;
426        let path = self.llms_json_path(source)?;
427        let json = serde_json::to_string_pretty(data)
428            .map_err(|e| Error::Storage(format!("Failed to serialize JSON: {e}")))?;
429
430        let tmp_path = path.with_extension("json.tmp");
431        fs::write(&tmp_path, json)
432            .map_err(|e| Error::Storage(format!("Failed to write llms.json: {e}")))?;
433
434        #[cfg(target_os = "windows")]
435        if path.exists() {
436            fs::remove_file(&path)
437                .map_err(|e| Error::Storage(format!("Failed to remove existing llms.json: {e}")))?;
438        }
439        fs::rename(&tmp_path, &path)
440            .map_err(|e| Error::Storage(format!("Failed to commit llms.json: {e}")))?;
441
442        debug!("Saved llms.json for {}", source);
443        Ok(())
444    }
445
446    /// Loads the parsed llms.json data for a source.
447    ///
448    /// # Errors
449    ///
450    /// Returns an error if the file cannot be read or the JSON is invalid.
451    pub fn load_llms_json(&self, source: &str) -> Result<LlmsJson> {
452        let path = self.llms_json_path(source)?;
453        if !path.exists() {
454            return Err(Error::Storage(format!(
455                "llms.json missing for source '{source}'"
456            )));
457        }
458        let json = fs::read_to_string(&path)
459            .map_err(|e| Error::Storage(format!("Failed to read llms.json: {e}")))?;
460
461        // Try to detect old v0.4.x format
462        if let Ok(raw_value) = serde_json::from_str::<serde_json::Value>(&json) {
463            if let Some(obj) = raw_value.as_object() {
464                // Old format has "alias" field instead of "source"
465                if obj.contains_key("alias")
466                    || (obj.contains_key("source") && obj["source"].is_object())
467                {
468                    return Err(Error::Storage(format!(
469                        "Incompatible cache format detected for source '{source}'.\n\n\
470                         This cache was created with blz v0.4.x or earlier and is not compatible with v0.5.0+.\n\n\
471                         To fix this, clear your cache:\n  \
472                         blz clear --force\n\n\
473                         Then re-add your sources."
474                    )));
475                }
476            }
477        }
478
479        let data = serde_json::from_str(&json)
480            .map_err(|e| Error::Storage(format!("Failed to parse llms.json: {e}")))?;
481        Ok(data)
482    }
483
484    /// Saves source metadata for a source.
485    ///
486    /// # Errors
487    ///
488    /// Returns an error if the metadata cannot be serialized or written.
489    pub fn save_source_metadata(&self, source: &str, metadata: &Source) -> Result<()> {
490        self.ensure_tool_dir(source)?;
491        let path = self.metadata_path(source)?;
492        let json = serde_json::to_string_pretty(metadata)
493            .map_err(|e| Error::Storage(format!("Failed to serialize metadata: {e}")))?;
494
495        // Write to a temp file first to ensure atomicity
496        let tmp_path = path.with_extension("json.tmp");
497        fs::write(&tmp_path, &json)
498            .map_err(|e| Error::Storage(format!("Failed to write temp metadata: {e}")))?;
499
500        // Atomically rename temp file to final path (handle Windows overwrite)
501        #[cfg(target_os = "windows")]
502        if path.exists() {
503            fs::remove_file(&path)
504                .map_err(|e| Error::Storage(format!("Failed to remove existing metadata: {e}")))?;
505        }
506        fs::rename(&tmp_path, &path)
507            .map_err(|e| Error::Storage(format!("Failed to persist metadata: {e}")))?;
508
509        debug!("Saved metadata for {}", source);
510        Ok(())
511    }
512
513    /// Save anchors remap JSON for a source.
514    ///
515    /// # Errors
516    ///
517    /// Returns an error if the anchors map cannot be serialized or written.
518    pub fn save_anchors_map(&self, source: &str, map: &crate::AnchorsMap) -> Result<()> {
519        self.ensure_tool_dir(source)?;
520        let path = self.anchors_map_path(source)?;
521        let json = serde_json::to_string_pretty(map)
522            .map_err(|e| Error::Storage(format!("Failed to serialize anchors map: {e}")))?;
523        fs::write(&path, json)
524            .map_err(|e| Error::Storage(format!("Failed to write anchors map: {e}")))?;
525        Ok(())
526    }
527
528    /// Loads source metadata for a source if it exists.
529    ///
530    /// # Errors
531    ///
532    /// Returns an error if the metadata cannot be read or parsed.
533    pub fn load_source_metadata(&self, source: &str) -> Result<Option<Source>> {
534        let path = self.metadata_path(source)?;
535        if !path.exists() {
536            return Ok(None);
537        }
538        let json = fs::read_to_string(&path)
539            .map_err(|e| Error::Storage(format!("Failed to read metadata: {e}")))?;
540        let metadata = serde_json::from_str(&json)
541            .map_err(|e| Error::Storage(format!("Failed to parse metadata: {e}")))?;
542        Ok(Some(metadata))
543    }
544
545    /// Checks if a source exists in storage
546    #[must_use]
547    pub fn exists(&self, source: &str) -> bool {
548        self.llms_json_path(source)
549            .map(|path| path.exists())
550            .unwrap_or(false)
551    }
552
553    /// Lists all cached sources
554    #[must_use]
555    pub fn list_sources(&self) -> Vec<String> {
556        let mut sources = Vec::new();
557        let sources_dir = self.root_dir.join("sources");
558
559        if let Ok(entries) = fs::read_dir(&sources_dir) {
560            for entry in entries.flatten() {
561                if entry.path().is_dir() {
562                    if let Some(name) = entry.file_name().to_str() {
563                        if !name.starts_with('.') && self.exists(name) {
564                            sources.push(name.to_string());
565                        }
566                    }
567                }
568            }
569        }
570
571        sources.sort();
572        sources
573    }
574
575    /// Clears the entire cache directory, removing all sources and their data.
576    ///
577    /// This is a destructive operation that cannot be undone. Use with caution.
578    ///
579    /// # Errors
580    ///
581    /// Returns an error if the cache directory cannot be removed or recreated.
582    pub fn clear_cache(&self) -> Result<()> {
583        // Remove the entire root directory
584        if self.root_dir.exists() {
585            fs::remove_dir_all(&self.root_dir)
586                .map_err(|e| Error::Storage(format!("Failed to remove cache directory: {e}")))?;
587        }
588
589        // Recreate empty root directory
590        fs::create_dir_all(&self.root_dir)
591            .map_err(|e| Error::Storage(format!("Failed to recreate cache directory: {e}")))?;
592
593        Ok(())
594    }
595
596    /// Archives the current version of a source.
597    ///
598    /// # Errors
599    ///
600    /// Returns an error if the archive directory cannot be created or files cannot be copied.
601    pub fn archive(&self, source: &str) -> Result<()> {
602        let archive_dir = self.archive_dir(source)?;
603        fs::create_dir_all(&archive_dir)
604            .map_err(|e| Error::Storage(format!("Failed to create archive directory: {e}")))?;
605
606        // Include seconds for uniqueness and clearer chronology
607        let timestamp = Utc::now().format("%Y-%m-%dT%H-%M-%SZ");
608
609        // Archive all llms*.json and llms*.txt files
610        let dir = self.tool_dir(source)?;
611        if dir.exists() {
612            for entry in fs::read_dir(&dir)
613                .map_err(|e| Error::Storage(format!("Failed to read dir for archive: {e}")))?
614            {
615                let entry =
616                    entry.map_err(|e| Error::Storage(format!("Failed to read entry: {e}")))?;
617                let path = entry.path();
618                if !path.is_file() {
619                    continue;
620                }
621                let name = entry.file_name();
622                let name_str = name.to_string_lossy().to_lowercase();
623                // Archive only llms*.json / llms*.txt (skip metadata/anchors)
624                let is_json = std::path::Path::new(&name_str)
625                    .extension()
626                    .is_some_and(|ext| ext.eq_ignore_ascii_case("json"));
627                let is_txt = std::path::Path::new(&name_str)
628                    .extension()
629                    .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"));
630                let is_llms_artifact = (is_json || is_txt) && name_str.starts_with("llms");
631                if is_llms_artifact {
632                    let archive_path =
633                        archive_dir.join(format!("{timestamp}-{}", name.to_string_lossy()));
634                    fs::copy(&path, &archive_path).map_err(|e| {
635                        Error::Storage(format!("Failed to archive {}: {e}", path.display()))
636                    })?;
637                }
638            }
639        }
640
641        info!("Archived {} at {}", source, timestamp);
642        Ok(())
643    }
644
645    /// Check for old cache directory and migrate if needed
646    fn check_and_migrate_old_cache(new_root: &Path) {
647        // Try to find the old cache directory
648        let old_project_dirs = ProjectDirs::from("dev", "outfitter", "cache");
649
650        if let Some(old_dirs) = old_project_dirs {
651            let old_root = old_dirs.data_dir();
652
653            // Check if old directory exists and has content
654            if old_root.exists() && old_root.is_dir() {
655                // Check if there's actually content to migrate (look for llms.json files)
656                let has_content = fs::read_dir(old_root)
657                    .map(|entries| {
658                        entries.filter_map(std::result::Result::ok).any(|entry| {
659                            let path = entry.path();
660                            if !path.is_dir() {
661                                return false;
662                            }
663                            let has_llms_json = path.join("llms.json").exists();
664                            let has_llms_txt = path.join("llms.txt").exists();
665                            let has_metadata = path.join("metadata.json").exists();
666                            has_llms_json || has_llms_txt || has_metadata
667                        })
668                    })
669                    .unwrap_or(false);
670                if has_content {
671                    // Check if new directory already exists with content
672                    if new_root.exists()
673                        && fs::read_dir(new_root)
674                            .map(|mut e| e.next().is_some())
675                            .unwrap_or(false)
676                    {
677                        // New directory already has content, just log a warning
678                        warn!(
679                            "Found old cache at {} but new cache at {} already exists. \
680                             Manual migration may be needed if you want to preserve old data.",
681                            old_root.display(),
682                            new_root.display()
683                        );
684                    } else {
685                        // Attempt migration
686                        info!(
687                            "Migrating cache from old location {} to new location {}",
688                            old_root.display(),
689                            new_root.display()
690                        );
691
692                        if let Err(e) = Self::migrate_directory(old_root, new_root) {
693                            // Log warning but don't fail - let the user continue with fresh cache
694                            warn!(
695                                "Could not automatically migrate cache: {}. \
696                                 Starting with fresh cache at {}. \
697                                 To manually migrate, copy contents from {} to {}",
698                                e,
699                                new_root.display(),
700                                old_root.display(),
701                                new_root.display()
702                            );
703                        } else {
704                            info!("Successfully migrated cache to new location");
705                        }
706                    }
707                }
708            }
709        }
710    }
711
712    /// Recursively copy directory contents from old to new location
713    fn migrate_directory(from: &Path, to: &Path) -> Result<()> {
714        // Create target directory if it doesn't exist
715        fs::create_dir_all(to)
716            .map_err(|e| Error::Storage(format!("Failed to create migration target: {e}")))?;
717
718        // Copy all entries
719        for entry in fs::read_dir(from)
720            .map_err(|e| Error::Storage(format!("Failed to read migration source: {e}")))?
721        {
722            let entry = entry
723                .map_err(|e| Error::Storage(format!("Failed to read directory entry: {e}")))?;
724            let path = entry.path();
725            let file_name = entry.file_name();
726            let target_path = to.join(&file_name);
727
728            if path.is_dir() {
729                // Recursively copy subdirectory
730                Self::migrate_directory(&path, &target_path)?;
731            } else {
732                // Copy file
733                fs::copy(&path, &target_path).map_err(|e| {
734                    Error::Storage(format!("Failed to copy file during migration: {e}"))
735                })?;
736            }
737        }
738
739        Ok(())
740    }
741}
742
743// Note: Default is not implemented as Storage::new() can fail.
744// Use Storage::new() directly and handle the Result.
745
746#[cfg(test)]
747#[allow(clippy::unwrap_used)]
748mod tests {
749    use super::*;
750    use crate::types::{FileInfo, LineIndex, Source, SourceVariant, TocEntry};
751    use std::fs;
752    use tempfile::TempDir;
753
754    fn create_test_storage() -> (Storage, TempDir) {
755        let temp_dir = TempDir::new().expect("Failed to create temp directory");
756        let storage = Storage::with_root(temp_dir.path().to_path_buf())
757            .expect("Failed to create test storage");
758        (storage, temp_dir)
759    }
760
761    fn create_test_llms_json(source_name: &str) -> LlmsJson {
762        LlmsJson {
763            source: source_name.to_string(),
764            metadata: Source {
765                url: format!("https://example.com/{source_name}/llms.txt"),
766                etag: Some("abc123".to_string()),
767                last_modified: None,
768                fetched_at: Utc::now(),
769                sha256: "deadbeef".to_string(),
770                variant: SourceVariant::Llms,
771                aliases: Vec::new(),
772                tags: Vec::new(),
773                description: None,
774                category: None,
775                npm_aliases: Vec::new(),
776                github_aliases: Vec::new(),
777                origin: crate::types::SourceOrigin {
778                    manifest: None,
779                    source_type: Some(crate::types::SourceType::Remote {
780                        url: format!("https://example.com/{source_name}/llms.txt"),
781                    }),
782                },
783                filter_non_english: None,
784            },
785            toc: vec![TocEntry {
786                heading_path: vec!["Getting Started".to_string()],
787                heading_path_display: Some(vec!["Getting Started".to_string()]),
788                heading_path_normalized: Some(vec!["getting started".to_string()]),
789                lines: "1-50".to_string(),
790                anchor: None,
791                children: vec![],
792            }],
793            files: vec![FileInfo {
794                path: "llms.txt".to_string(),
795                sha256: "deadbeef".to_string(),
796            }],
797            line_index: LineIndex {
798                total_lines: 100,
799                byte_offsets: false,
800            },
801            diagnostics: vec![],
802            parse_meta: None,
803            filter_stats: None,
804        }
805    }
806
807    #[test]
808    fn test_storage_creation_with_root() {
809        let temp_dir = TempDir::new().expect("Failed to create temp directory");
810        let storage = Storage::with_root(temp_dir.path().to_path_buf());
811
812        assert!(storage.is_ok());
813        let _storage = storage.unwrap();
814
815        // Verify root directory was created
816        assert!(temp_dir.path().exists());
817    }
818
819    #[test]
820    fn test_tool_directory_paths() {
821        let (storage, _temp_dir) = create_test_storage();
822
823        let tool_dir = storage.tool_dir("react").expect("Should get tool dir");
824        let llms_txt_path = storage
825            .llms_txt_path("react")
826            .expect("Should get llms.txt path");
827        let llms_json_path = storage
828            .llms_json_path("react")
829            .expect("Should get llms.json path");
830        let index_dir = storage.index_dir("react").expect("Should get index dir");
831        let archive_dir = storage
832            .archive_dir("react")
833            .expect("Should get archive dir");
834
835        assert!(tool_dir.ends_with("react"));
836        assert!(llms_txt_path.ends_with("react/llms.txt"));
837        assert!(llms_json_path.ends_with("react/llms.json"));
838        assert!(index_dir.ends_with("react/.index"));
839        assert!(archive_dir.ends_with("react/.archive"));
840    }
841
842    #[test]
843    fn test_invalid_alias_validation() {
844        let (storage, _temp_dir) = create_test_storage();
845
846        // Test path traversal attempts
847        assert!(storage.tool_dir("../etc").is_err());
848        assert!(storage.tool_dir("../../passwd").is_err());
849        assert!(storage.tool_dir("test/../../../etc").is_err());
850
851        // Test invalid characters
852        assert!(storage.tool_dir(".hidden").is_err());
853        assert!(storage.tool_dir("test\0null").is_err());
854        assert!(storage.tool_dir("test/slash").is_err());
855        assert!(storage.tool_dir("test\\backslash").is_err());
856
857        // Test empty alias
858        assert!(storage.tool_dir("").is_err());
859
860        // Test valid aliases
861        assert!(storage.tool_dir("react").is_ok());
862        assert!(storage.tool_dir("my-tool").is_ok());
863        assert!(storage.tool_dir("tool_123").is_ok());
864    }
865
866    #[test]
867    fn test_ensure_tool_directory() {
868        let (storage, _temp_dir) = create_test_storage();
869
870        let tool_dir = storage
871            .ensure_tool_dir("react")
872            .expect("Should create tool dir");
873        assert!(tool_dir.exists());
874
875        // Should be idempotent
876        let tool_dir2 = storage
877            .ensure_tool_dir("react")
878            .expect("Should not fail on existing dir");
879        assert_eq!(tool_dir, tool_dir2);
880    }
881
882    #[test]
883    fn test_save_and_load_llms_txt() {
884        let (storage, _temp_dir) = create_test_storage();
885
886        let content = "# React Documentation\n\nThis is the React documentation...";
887
888        // Save content
889        storage
890            .save_llms_txt("react", content)
891            .expect("Should save llms.txt");
892
893        // Verify file exists
894        assert!(
895            storage
896                .llms_txt_path("react")
897                .expect("Should get path")
898                .exists()
899        );
900
901        // Load content
902        let loaded_content = storage
903            .load_llms_txt("react")
904            .expect("Should load llms.txt");
905        assert_eq!(content, loaded_content);
906    }
907
908    #[test]
909    fn test_save_and_load_llms_json() {
910        let (storage, _temp_dir) = create_test_storage();
911
912        let llms_json = create_test_llms_json("react");
913
914        // Save JSON
915        storage
916            .save_llms_json("react", &llms_json)
917            .expect("Should save llms.json");
918
919        // Verify file exists
920        assert!(
921            storage
922                .llms_json_path("react")
923                .expect("Should get path")
924                .exists()
925        );
926
927        // Load JSON
928        let loaded_json = storage
929            .load_llms_json("react")
930            .expect("Should load llms.json");
931        assert_eq!(llms_json.source, loaded_json.source);
932        assert_eq!(llms_json.metadata.url, loaded_json.metadata.url);
933        assert_eq!(
934            llms_json.line_index.total_lines,
935            loaded_json.line_index.total_lines
936        );
937    }
938
939    #[test]
940    fn test_source_exists() {
941        let (storage, _temp_dir) = create_test_storage();
942
943        // Initially should not exist
944        assert!(!storage.exists("react"));
945
946        // After saving llms.json, should exist
947        let llms_json = create_test_llms_json("react");
948        storage
949            .save_llms_json("react", &llms_json)
950            .expect("Should save");
951
952        assert!(storage.exists("react"));
953    }
954
955    #[test]
956    fn test_list_sources_empty() {
957        let (storage, _temp_dir) = create_test_storage();
958
959        let sources = storage.list_sources();
960        assert!(sources.is_empty());
961    }
962
963    #[test]
964    fn test_list_sources_with_data() {
965        let (storage, _temp_dir) = create_test_storage();
966
967        // Add multiple sources
968        let aliases = ["react", "nextjs", "rust"];
969        for &alias in &aliases {
970            let llms_json = create_test_llms_json(alias);
971            storage
972                .save_llms_json(alias, &llms_json)
973                .expect("Should save");
974        }
975
976        let sources = storage.list_sources();
977        assert_eq!(sources.len(), 3);
978
979        // Should be sorted
980        assert_eq!(sources, vec!["nextjs", "react", "rust"]);
981    }
982
983    #[test]
984    fn test_list_sources_ignores_hidden_dirs() {
985        let (storage, temp_dir) = create_test_storage();
986
987        // Create a hidden directory
988        let hidden_dir = temp_dir.path().join(".hidden");
989        fs::create_dir(&hidden_dir).expect("Should create hidden dir");
990
991        // Create a regular source
992        let llms_json = create_test_llms_json("react");
993        storage
994            .save_llms_json("react", &llms_json)
995            .expect("Should save");
996
997        let sources = storage.list_sources();
998        assert_eq!(sources.len(), 1);
999        assert_eq!(sources[0], "react");
1000    }
1001
1002    #[test]
1003    fn test_list_sources_requires_llms_json() {
1004        let (storage, _temp_dir) = create_test_storage();
1005
1006        // Create tool directory without llms.json
1007        storage
1008            .ensure_tool_dir("incomplete")
1009            .expect("Should create dir");
1010
1011        // Save only llms.txt (no llms.json)
1012        storage
1013            .save_llms_txt("incomplete", "# Test content")
1014            .expect("Should save txt");
1015
1016        // Create another source with complete data
1017        let llms_json = create_test_llms_json("complete");
1018        storage
1019            .save_llms_json("complete", &llms_json)
1020            .expect("Should save json");
1021
1022        let sources = storage.list_sources();
1023        assert_eq!(sources.len(), 1);
1024        assert_eq!(sources[0], "complete");
1025    }
1026
1027    #[test]
1028    fn test_archive_functionality() {
1029        let (storage, _temp_dir) = create_test_storage();
1030
1031        // Create source data
1032        let content = "# Test content";
1033        let llms_json = create_test_llms_json("test");
1034
1035        storage
1036            .save_llms_txt("test", content)
1037            .expect("Should save txt");
1038        storage
1039            .save_llms_json("test", &llms_json)
1040            .expect("Should save json");
1041
1042        // Archive the source
1043        storage.archive("test").expect("Should archive");
1044
1045        // Verify archive directory exists
1046        let archive_dir = storage.archive_dir("test").expect("Should get archive dir");
1047        assert!(archive_dir.exists());
1048
1049        // Verify archived files exist (names contain timestamp)
1050        let archive_entries: Vec<_> = fs::read_dir(&archive_dir)
1051            .expect("Should read archive dir")
1052            .collect::<std::result::Result<Vec<_>, std::io::Error>>()
1053            .expect("Should collect entries");
1054
1055        assert_eq!(archive_entries.len(), 2); // llms.txt and llms.json
1056
1057        // Verify archived files have correct names
1058        let mut has_txt = false;
1059        let mut has_json = false;
1060        for entry in archive_entries {
1061            let name = entry.file_name().to_string_lossy().to_string();
1062            if name.contains("llms.txt") {
1063                has_txt = true;
1064            }
1065            if name.contains("llms.json") {
1066                has_json = true;
1067            }
1068        }
1069
1070        assert!(has_txt, "Should have archived llms.txt");
1071        assert!(has_json, "Should have archived llms.json");
1072    }
1073
1074    #[test]
1075    fn test_archive_missing_files() {
1076        let (storage, _temp_dir) = create_test_storage();
1077
1078        // Archive non-existent source - should not fail
1079        let result = storage.archive("nonexistent");
1080        assert!(result.is_ok());
1081
1082        // Archive directory should still be created
1083        let archive_dir = storage
1084            .archive_dir("nonexistent")
1085            .expect("Should get archive dir");
1086        assert!(archive_dir.exists());
1087    }
1088
1089    #[test]
1090    fn test_load_missing_files_returns_error() {
1091        let (storage, _temp_dir) = create_test_storage();
1092
1093        let result = storage.load_llms_txt("nonexistent");
1094        assert!(result.is_err());
1095
1096        let result = storage.load_llms_json("nonexistent");
1097        assert!(result.is_err());
1098    }
1099
1100    #[test]
1101    fn test_json_serialization_roundtrip() {
1102        let (storage, _temp_dir) = create_test_storage();
1103
1104        let original = create_test_llms_json("test");
1105
1106        // Save and load
1107        storage
1108            .save_llms_json("test", &original)
1109            .expect("Should save");
1110        let loaded = storage.load_llms_json("test").expect("Should load");
1111
1112        // Verify all fields are preserved
1113        assert_eq!(original.source, loaded.source);
1114        assert_eq!(original.metadata.url, loaded.metadata.url);
1115        assert_eq!(original.metadata.sha256, loaded.metadata.sha256);
1116        assert_eq!(original.toc.len(), loaded.toc.len());
1117        assert_eq!(original.files.len(), loaded.files.len());
1118        assert_eq!(
1119            original.line_index.total_lines,
1120            loaded.line_index.total_lines
1121        );
1122        assert_eq!(original.diagnostics.len(), loaded.diagnostics.len());
1123    }
1124}