1use crate::{Error, LlmsJson, Result, Source};
2use chrono::Utc;
3use directories::ProjectDirs;
4use std::fs;
5use std::path::{Path, PathBuf};
6use tracing::{debug, info, warn};
7
8const MAX_ALIAS_LEN: usize = 64;
10
11pub struct Storage {
13 root_dir: PathBuf,
14}
15
16impl Storage {
17 pub fn new() -> Result<Self> {
19 if let Ok(dir) = std::env::var("BLZ_DATA_DIR") {
21 let root = PathBuf::from(dir);
22 return Self::with_root(root);
23 }
24
25 let project_dirs = ProjectDirs::from("dev", "outfitter", "blz")
26 .ok_or_else(|| Error::Storage("Failed to determine project directories".into()))?;
27
28 let root_dir = project_dirs.data_dir().to_path_buf();
29
30 Self::check_and_migrate_old_cache(&root_dir);
32
33 Self::with_root(root_dir)
34 }
35
36 pub fn with_root(root_dir: PathBuf) -> Result<Self> {
38 fs::create_dir_all(&root_dir)
39 .map_err(|e| Error::Storage(format!("Failed to create root directory: {e}")))?;
40
41 Ok(Self { root_dir })
42 }
43
44 pub fn tool_dir(&self, alias: &str) -> Result<PathBuf> {
46 Self::validate_alias(alias)?;
48 Ok(self.root_dir.join(alias))
49 }
50
51 pub fn ensure_tool_dir(&self, alias: &str) -> Result<PathBuf> {
53 let dir = self.tool_dir(alias)?;
54 fs::create_dir_all(&dir)
55 .map_err(|e| Error::Storage(format!("Failed to create tool directory: {e}")))?;
56 Ok(dir)
57 }
58
59 fn validate_alias(alias: &str) -> Result<()> {
64 if alias.is_empty() {
66 return Err(Error::Storage("Alias cannot be empty".into()));
67 }
68
69 if alias.starts_with('-') {
71 return Err(Error::Storage(format!(
72 "Invalid alias '{alias}': cannot start with '-'"
73 )));
74 }
75
76 if alias.contains("..") || alias.contains('/') || alias.contains('\\') {
78 return Err(Error::Storage(format!(
79 "Invalid alias '{alias}': contains path traversal characters"
80 )));
81 }
82
83 if alias.starts_with('.') || alias.contains('\0') {
85 return Err(Error::Storage(format!(
86 "Invalid alias '{alias}': contains invalid filesystem characters"
87 )));
88 }
89
90 #[cfg(target_os = "windows")]
92 {
93 const RESERVED_NAMES: &[&str] = &[
94 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
95 "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
96 "LPT9",
97 ];
98
99 let upper_alias = alias.to_uppercase();
100 if RESERVED_NAMES.contains(&upper_alias.as_str()) {
101 return Err(Error::Storage(format!(
102 "Invalid alias '{}': reserved name on Windows",
103 alias
104 )));
105 }
106 }
107
108 if alias.len() > MAX_ALIAS_LEN {
110 return Err(Error::Storage(format!(
111 "Invalid alias '{alias}': exceeds maximum length of {MAX_ALIAS_LEN} characters"
112 )));
113 }
114
115 if !alias
117 .chars()
118 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
119 {
120 return Err(Error::Storage(format!(
121 "Invalid alias '{alias}': only [A-Za-z0-9_-] are allowed"
122 )));
123 }
124
125 Ok(())
126 }
127
128 pub fn llms_txt_path(&self, alias: &str) -> Result<PathBuf> {
130 Ok(self.tool_dir(alias)?.join("llms.txt"))
131 }
132
133 pub fn llms_json_path(&self, alias: &str) -> Result<PathBuf> {
135 Ok(self.tool_dir(alias)?.join("llms.json"))
136 }
137
138 pub fn index_dir(&self, alias: &str) -> Result<PathBuf> {
140 Ok(self.tool_dir(alias)?.join(".index"))
141 }
142
143 pub fn archive_dir(&self, alias: &str) -> Result<PathBuf> {
145 Ok(self.tool_dir(alias)?.join(".archive"))
146 }
147
148 pub fn metadata_path(&self, alias: &str) -> Result<PathBuf> {
150 Ok(self.tool_dir(alias)?.join("metadata.json"))
151 }
152
153 pub fn anchors_map_path(&self, alias: &str) -> Result<PathBuf> {
155 Ok(self.tool_dir(alias)?.join("anchors.json"))
156 }
157
158 pub fn save_llms_txt(&self, alias: &str, content: &str) -> Result<()> {
160 self.ensure_tool_dir(alias)?;
161 let path = self.llms_txt_path(alias)?;
162
163 let tmp_path = path.with_extension("txt.tmp");
165 fs::write(&tmp_path, content)
166 .map_err(|e| Error::Storage(format!("Failed to write llms.txt: {e}")))?;
167
168 #[cfg(target_os = "windows")]
170 if path.exists() {
171 fs::remove_file(&path)
172 .map_err(|e| Error::Storage(format!("Failed to remove existing llms.txt: {e}")))?;
173 }
174 fs::rename(&tmp_path, &path)
175 .map_err(|e| Error::Storage(format!("Failed to commit llms.txt: {e}")))?;
176
177 debug!("Saved llms.txt for {}", alias);
178 Ok(())
179 }
180
181 pub fn load_llms_txt(&self, alias: &str) -> Result<String> {
183 let path = self.llms_txt_path(alias)?;
184 fs::read_to_string(&path)
185 .map_err(|e| Error::Storage(format!("Failed to read llms.txt: {e}")))
186 }
187
188 pub fn save_llms_json(&self, alias: &str, data: &LlmsJson) -> Result<()> {
190 self.ensure_tool_dir(alias)?;
191 let path = self.llms_json_path(alias)?;
192 let json = serde_json::to_string_pretty(data)
193 .map_err(|e| Error::Storage(format!("Failed to serialize JSON: {e}")))?;
194
195 let tmp_path = path.with_extension("json.tmp");
197 fs::write(&tmp_path, json)
198 .map_err(|e| Error::Storage(format!("Failed to write llms.json: {e}")))?;
199
200 #[cfg(target_os = "windows")]
202 if path.exists() {
203 fs::remove_file(&path)
204 .map_err(|e| Error::Storage(format!("Failed to remove existing llms.json: {e}")))?;
205 }
206 fs::rename(&tmp_path, &path)
207 .map_err(|e| Error::Storage(format!("Failed to commit llms.json: {e}")))?;
208
209 debug!("Saved llms.json for {}", alias);
210 Ok(())
211 }
212
213 pub fn load_llms_json(&self, alias: &str) -> Result<LlmsJson> {
215 let path = self.llms_json_path(alias)?;
216 let json = fs::read_to_string(&path)
217 .map_err(|e| Error::Storage(format!("Failed to read llms.json: {e}")))?;
218 serde_json::from_str(&json)
219 .map_err(|e| Error::Storage(format!("Failed to parse JSON: {e}")))
220 }
221
222 pub fn save_source_metadata(&self, alias: &str, source: &Source) -> Result<()> {
224 self.ensure_tool_dir(alias)?;
225 let path = self.metadata_path(alias)?;
226 let json = serde_json::to_string_pretty(source)
227 .map_err(|e| Error::Storage(format!("Failed to serialize metadata: {e}")))?;
228
229 let tmp_path = path.with_extension("json.tmp");
231 fs::write(&tmp_path, &json)
232 .map_err(|e| Error::Storage(format!("Failed to write temp metadata: {e}")))?;
233
234 #[cfg(target_os = "windows")]
236 if path.exists() {
237 fs::remove_file(&path)
238 .map_err(|e| Error::Storage(format!("Failed to remove existing metadata: {e}")))?;
239 }
240 fs::rename(&tmp_path, &path)
241 .map_err(|e| Error::Storage(format!("Failed to persist metadata: {e}")))?;
242
243 debug!("Saved metadata for {}", alias);
244 Ok(())
245 }
246
247 pub fn save_anchors_map(&self, alias: &str, map: &crate::AnchorsMap) -> Result<()> {
249 self.ensure_tool_dir(alias)?;
250 let path = self.anchors_map_path(alias)?;
251 let json = serde_json::to_string_pretty(map)
252 .map_err(|e| Error::Storage(format!("Failed to serialize anchors map: {e}")))?;
253 fs::write(&path, json)
254 .map_err(|e| Error::Storage(format!("Failed to write anchors map: {e}")))?;
255 Ok(())
256 }
257
258 pub fn load_source_metadata(&self, alias: &str) -> Result<Option<Source>> {
260 let path = self.metadata_path(alias)?;
261 if !path.exists() {
262 return Ok(None);
263 }
264 let json = fs::read_to_string(&path)
265 .map_err(|e| Error::Storage(format!("Failed to read metadata: {e}")))?;
266 let source = serde_json::from_str(&json)
267 .map_err(|e| Error::Storage(format!("Failed to parse metadata: {e}")))?;
268 Ok(Some(source))
269 }
270
271 #[must_use]
273 pub fn exists(&self, alias: &str) -> bool {
274 self.llms_json_path(alias)
275 .map(|path| path.exists())
276 .unwrap_or(false)
277 }
278
279 #[must_use]
281 pub fn list_sources(&self) -> Vec<String> {
282 let mut sources = Vec::new();
283
284 if let Ok(entries) = fs::read_dir(&self.root_dir) {
285 for entry in entries.flatten() {
286 if entry.path().is_dir() {
287 if let Some(name) = entry.file_name().to_str() {
288 if !name.starts_with('.') && self.exists(name) {
289 sources.push(name.to_string());
290 }
291 }
292 }
293 }
294 }
295
296 sources.sort();
297 sources
298 }
299
300 pub fn archive(&self, alias: &str) -> Result<()> {
302 let archive_dir = self.archive_dir(alias)?;
303 fs::create_dir_all(&archive_dir)
304 .map_err(|e| Error::Storage(format!("Failed to create archive directory: {e}")))?;
305
306 let timestamp = Utc::now().format("%Y-%m-%dT%H-%M-%SZ");
308
309 let llms_txt = self.llms_txt_path(alias)?;
310 if llms_txt.exists() {
311 let archive_path = archive_dir.join(format!("{timestamp}-llms.txt"));
312 fs::copy(&llms_txt, &archive_path)
313 .map_err(|e| Error::Storage(format!("Failed to archive llms.txt: {e}")))?;
314 }
315
316 let llms_json = self.llms_json_path(alias)?;
317 if llms_json.exists() {
318 let archive_path = archive_dir.join(format!("{timestamp}-llms.json"));
319 fs::copy(&llms_json, &archive_path)
320 .map_err(|e| Error::Storage(format!("Failed to archive llms.json: {e}")))?;
321 }
322
323 info!("Archived {} at {}", alias, timestamp);
324 Ok(())
325 }
326
327 fn check_and_migrate_old_cache(new_root: &Path) {
329 let old_project_dirs = ProjectDirs::from("dev", "outfitter", "cache");
331
332 if let Some(old_dirs) = old_project_dirs {
333 let old_root = old_dirs.data_dir();
334
335 if old_root.exists() && old_root.is_dir() {
337 let has_content = fs::read_dir(old_root)
339 .map(|entries| {
340 entries.filter_map(std::result::Result::ok).any(|entry| {
341 let path = entry.path();
342 if !path.is_dir() {
343 return false;
344 }
345 let has_llms_json = path.join("llms.json").exists();
346 let has_llms_txt = path.join("llms.txt").exists();
347 let has_metadata = path.join("metadata.json").exists();
348 has_llms_json || has_llms_txt || has_metadata
349 })
350 })
351 .unwrap_or(false);
352 if has_content {
353 if new_root.exists()
355 && fs::read_dir(new_root)
356 .map(|mut e| e.next().is_some())
357 .unwrap_or(false)
358 {
359 warn!(
361 "Found old cache at {} but new cache at {} already exists. \
362 Manual migration may be needed if you want to preserve old data.",
363 old_root.display(),
364 new_root.display()
365 );
366 } else {
367 info!(
369 "Migrating cache from old location {} to new location {}",
370 old_root.display(),
371 new_root.display()
372 );
373
374 if let Err(e) = Self::migrate_directory(old_root, new_root) {
375 warn!(
377 "Could not automatically migrate cache: {}. \
378 Starting with fresh cache at {}. \
379 To manually migrate, copy contents from {} to {}",
380 e,
381 new_root.display(),
382 old_root.display(),
383 new_root.display()
384 );
385 } else {
386 info!("Successfully migrated cache to new location");
387 }
388 }
389 }
390 }
391 }
392 }
393
394 fn migrate_directory(from: &Path, to: &Path) -> Result<()> {
396 fs::create_dir_all(to)
398 .map_err(|e| Error::Storage(format!("Failed to create migration target: {e}")))?;
399
400 for entry in fs::read_dir(from)
402 .map_err(|e| Error::Storage(format!("Failed to read migration source: {e}")))?
403 {
404 let entry = entry
405 .map_err(|e| Error::Storage(format!("Failed to read directory entry: {e}")))?;
406 let path = entry.path();
407 let file_name = entry.file_name();
408 let target_path = to.join(&file_name);
409
410 if path.is_dir() {
411 Self::migrate_directory(&path, &target_path)?;
413 } else {
414 fs::copy(&path, &target_path).map_err(|e| {
416 Error::Storage(format!("Failed to copy file during migration: {e}"))
417 })?;
418 }
419 }
420
421 Ok(())
422 }
423}
424
425#[cfg(test)]
429#[allow(clippy::unwrap_used)]
430mod tests {
431 use super::*;
432 use crate::types::{FileInfo, LineIndex, Source, TocEntry};
433 use std::fs;
434 use tempfile::TempDir;
435
436 fn create_test_storage() -> (Storage, TempDir) {
437 let temp_dir = TempDir::new().expect("Failed to create temp directory");
438 let storage = Storage::with_root(temp_dir.path().to_path_buf())
439 .expect("Failed to create test storage");
440 (storage, temp_dir)
441 }
442
443 fn create_test_llms_json(alias: &str) -> LlmsJson {
444 LlmsJson {
445 alias: alias.to_string(),
446 source: Source {
447 url: format!("https://example.com/{alias}/llms.txt"),
448 etag: Some("abc123".to_string()),
449 last_modified: None,
450 fetched_at: Utc::now(),
451 sha256: "deadbeef".to_string(),
452 aliases: Vec::new(),
453 },
454 toc: vec![TocEntry {
455 heading_path: vec!["Getting Started".to_string()],
456 lines: "1-50".to_string(),
457 anchor: None,
458 children: vec![],
459 }],
460 files: vec![FileInfo {
461 path: "llms.txt".to_string(),
462 sha256: "deadbeef".to_string(),
463 }],
464 line_index: LineIndex {
465 total_lines: 100,
466 byte_offsets: false,
467 },
468 diagnostics: vec![],
469 parse_meta: None,
470 }
471 }
472
473 #[test]
474 fn test_storage_creation_with_root() {
475 let temp_dir = TempDir::new().expect("Failed to create temp directory");
476 let storage = Storage::with_root(temp_dir.path().to_path_buf());
477
478 assert!(storage.is_ok());
479 let _storage = storage.unwrap();
480
481 assert!(temp_dir.path().exists());
483 }
484
485 #[test]
486 fn test_tool_directory_paths() {
487 let (storage, _temp_dir) = create_test_storage();
488
489 let tool_dir = storage.tool_dir("react").expect("Should get tool dir");
490 let llms_txt_path = storage
491 .llms_txt_path("react")
492 .expect("Should get llms.txt path");
493 let llms_json_path = storage
494 .llms_json_path("react")
495 .expect("Should get llms.json path");
496 let index_dir = storage.index_dir("react").expect("Should get index dir");
497 let archive_dir = storage
498 .archive_dir("react")
499 .expect("Should get archive dir");
500
501 assert!(tool_dir.ends_with("react"));
502 assert!(llms_txt_path.ends_with("react/llms.txt"));
503 assert!(llms_json_path.ends_with("react/llms.json"));
504 assert!(index_dir.ends_with("react/.index"));
505 assert!(archive_dir.ends_with("react/.archive"));
506 }
507
508 #[test]
509 fn test_invalid_alias_validation() {
510 let (storage, _temp_dir) = create_test_storage();
511
512 assert!(storage.tool_dir("../etc").is_err());
514 assert!(storage.tool_dir("../../passwd").is_err());
515 assert!(storage.tool_dir("test/../../../etc").is_err());
516
517 assert!(storage.tool_dir(".hidden").is_err());
519 assert!(storage.tool_dir("test\0null").is_err());
520 assert!(storage.tool_dir("test/slash").is_err());
521 assert!(storage.tool_dir("test\\backslash").is_err());
522
523 assert!(storage.tool_dir("").is_err());
525
526 assert!(storage.tool_dir("react").is_ok());
528 assert!(storage.tool_dir("my-tool").is_ok());
529 assert!(storage.tool_dir("tool_123").is_ok());
530 }
531
532 #[test]
533 fn test_ensure_tool_directory() {
534 let (storage, _temp_dir) = create_test_storage();
535
536 let tool_dir = storage
537 .ensure_tool_dir("react")
538 .expect("Should create tool dir");
539 assert!(tool_dir.exists());
540
541 let tool_dir2 = storage
543 .ensure_tool_dir("react")
544 .expect("Should not fail on existing dir");
545 assert_eq!(tool_dir, tool_dir2);
546 }
547
548 #[test]
549 fn test_save_and_load_llms_txt() {
550 let (storage, _temp_dir) = create_test_storage();
551
552 let content = "# React Documentation\n\nThis is the React documentation...";
553
554 storage
556 .save_llms_txt("react", content)
557 .expect("Should save llms.txt");
558
559 assert!(
561 storage
562 .llms_txt_path("react")
563 .expect("Should get path")
564 .exists()
565 );
566
567 let loaded_content = storage
569 .load_llms_txt("react")
570 .expect("Should load llms.txt");
571 assert_eq!(content, loaded_content);
572 }
573
574 #[test]
575 fn test_save_and_load_llms_json() {
576 let (storage, _temp_dir) = create_test_storage();
577
578 let llms_json = create_test_llms_json("react");
579
580 storage
582 .save_llms_json("react", &llms_json)
583 .expect("Should save llms.json");
584
585 assert!(
587 storage
588 .llms_json_path("react")
589 .expect("Should get path")
590 .exists()
591 );
592
593 let loaded_json = storage
595 .load_llms_json("react")
596 .expect("Should load llms.json");
597 assert_eq!(llms_json.alias, loaded_json.alias);
598 assert_eq!(llms_json.source.url, loaded_json.source.url);
599 assert_eq!(
600 llms_json.line_index.total_lines,
601 loaded_json.line_index.total_lines
602 );
603 }
604
605 #[test]
606 fn test_source_exists() {
607 let (storage, _temp_dir) = create_test_storage();
608
609 assert!(!storage.exists("react"));
611
612 let llms_json = create_test_llms_json("react");
614 storage
615 .save_llms_json("react", &llms_json)
616 .expect("Should save");
617
618 assert!(storage.exists("react"));
619 }
620
621 #[test]
622 fn test_list_sources_empty() {
623 let (storage, _temp_dir) = create_test_storage();
624
625 let sources = storage.list_sources();
626 assert!(sources.is_empty());
627 }
628
629 #[test]
630 fn test_list_sources_with_data() {
631 let (storage, _temp_dir) = create_test_storage();
632
633 let aliases = ["react", "nextjs", "rust"];
635 for &alias in &aliases {
636 let llms_json = create_test_llms_json(alias);
637 storage
638 .save_llms_json(alias, &llms_json)
639 .expect("Should save");
640 }
641
642 let sources = storage.list_sources();
643 assert_eq!(sources.len(), 3);
644
645 assert_eq!(sources, vec!["nextjs", "react", "rust"]);
647 }
648
649 #[test]
650 fn test_list_sources_ignores_hidden_dirs() {
651 let (storage, temp_dir) = create_test_storage();
652
653 let hidden_dir = temp_dir.path().join(".hidden");
655 fs::create_dir(&hidden_dir).expect("Should create hidden dir");
656
657 let llms_json = create_test_llms_json("react");
659 storage
660 .save_llms_json("react", &llms_json)
661 .expect("Should save");
662
663 let sources = storage.list_sources();
664 assert_eq!(sources.len(), 1);
665 assert_eq!(sources[0], "react");
666 }
667
668 #[test]
669 fn test_list_sources_requires_llms_json() {
670 let (storage, _temp_dir) = create_test_storage();
671
672 storage
674 .ensure_tool_dir("incomplete")
675 .expect("Should create dir");
676
677 storage
679 .save_llms_txt("incomplete", "# Test content")
680 .expect("Should save txt");
681
682 let llms_json = create_test_llms_json("complete");
684 storage
685 .save_llms_json("complete", &llms_json)
686 .expect("Should save json");
687
688 let sources = storage.list_sources();
689 assert_eq!(sources.len(), 1);
690 assert_eq!(sources[0], "complete");
691 }
692
693 #[test]
694 fn test_archive_functionality() {
695 let (storage, _temp_dir) = create_test_storage();
696
697 let content = "# Test content";
699 let llms_json = create_test_llms_json("test");
700
701 storage
702 .save_llms_txt("test", content)
703 .expect("Should save txt");
704 storage
705 .save_llms_json("test", &llms_json)
706 .expect("Should save json");
707
708 storage.archive("test").expect("Should archive");
710
711 let archive_dir = storage.archive_dir("test").expect("Should get archive dir");
713 assert!(archive_dir.exists());
714
715 let archive_entries: Vec<_> = fs::read_dir(&archive_dir)
717 .expect("Should read archive dir")
718 .collect::<std::result::Result<Vec<_>, std::io::Error>>()
719 .expect("Should collect entries");
720
721 assert_eq!(archive_entries.len(), 2); let mut has_txt = false;
725 let mut has_json = false;
726 for entry in archive_entries {
727 let name = entry.file_name().to_string_lossy().to_string();
728 if name.contains("llms.txt") {
729 has_txt = true;
730 }
731 if name.contains("llms.json") {
732 has_json = true;
733 }
734 }
735
736 assert!(has_txt, "Should have archived llms.txt");
737 assert!(has_json, "Should have archived llms.json");
738 }
739
740 #[test]
741 fn test_archive_missing_files() {
742 let (storage, _temp_dir) = create_test_storage();
743
744 let result = storage.archive("nonexistent");
746 assert!(result.is_ok());
747
748 let archive_dir = storage
750 .archive_dir("nonexistent")
751 .expect("Should get archive dir");
752 assert!(archive_dir.exists());
753 }
754
755 #[test]
756 fn test_load_missing_files_returns_error() {
757 let (storage, _temp_dir) = create_test_storage();
758
759 let result = storage.load_llms_txt("nonexistent");
760 assert!(result.is_err());
761
762 let result = storage.load_llms_json("nonexistent");
763 assert!(result.is_err());
764 }
765
766 #[test]
767 fn test_json_serialization_roundtrip() {
768 let (storage, _temp_dir) = create_test_storage();
769
770 let original = create_test_llms_json("test");
771
772 storage
774 .save_llms_json("test", &original)
775 .expect("Should save");
776 let loaded = storage.load_llms_json("test").expect("Should load");
777
778 assert_eq!(original.alias, loaded.alias);
780 assert_eq!(original.source.url, loaded.source.url);
781 assert_eq!(original.source.sha256, loaded.source.sha256);
782 assert_eq!(original.toc.len(), loaded.toc.len());
783 assert_eq!(original.files.len(), loaded.files.len());
784 assert_eq!(
785 original.line_index.total_lines,
786 loaded.line_index.total_lines
787 );
788 assert_eq!(original.diagnostics.len(), loaded.diagnostics.len());
789 }
790}