rust_docs_mcp/cache/
storage.rs

1use anyhow::{Context, Result, bail};
2use serde::{Deserialize, Serialize};
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use crate::cache::constants::*;
7use crate::cache::types::CrateIdentifier;
8use crate::cache::utils::copy_directory_contents;
9
10/// Unified metadata for both crates and workspace members
11#[derive(Debug, Serialize, Deserialize, Clone)]
12pub struct CacheMetadata {
13    pub name: String,
14    pub version: String,
15    pub cached_at: chrono::DateTime<chrono::Utc>,
16    pub doc_generated: bool,
17    pub size_bytes: u64,
18    #[serde(default = "default_source")]
19    pub source: String,
20    #[serde(default)]
21    pub source_path: Option<String>,
22
23    // Member-specific fields (None for main crates)
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub member_info: Option<MemberInfo>,
26}
27
28#[derive(Debug, Serialize, Deserialize, Clone)]
29pub struct MemberInfo {
30    /// Original member path as provided (e.g., "crates/rmcp")
31    pub original_path: String,
32    /// Normalized path used for storage (e.g., "crates-rmcp")
33    pub normalized_path: String,
34    /// Package name from Cargo.toml
35    pub package_name: String,
36}
37
38/// Default source for backward compatibility
39fn default_source() -> String {
40    "crates.io".to_string()
41}
42
43/// Manages the file system storage for cached crates and their documentation
44#[derive(Debug, Clone)]
45pub struct CacheStorage {
46    cache_dir: PathBuf,
47}
48
49impl CacheStorage {
50    /// Create a new cache storage instance
51    pub fn new(custom_cache_dir: Option<PathBuf>) -> Result<Self> {
52        let cache_dir = match custom_cache_dir {
53            Some(dir) => dir,
54            None => dirs::home_dir()
55                .context("Failed to get home directory")?
56                .join(CACHE_ROOT_DIR)
57                .join(CACHE_DIR),
58        };
59
60        fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
61
62        Ok(Self { cache_dir })
63    }
64
65    /// Get the cache directory path
66    pub fn cache_dir(&self) -> &Path {
67        &self.cache_dir
68    }
69
70    /// Get the path for a specific crate version
71    pub fn crate_path(&self, name: &str, version: &str) -> Result<PathBuf> {
72        let crate_id = CrateIdentifier::new(name, version)?;
73        Ok(self.crate_path_for_id(&crate_id))
74    }
75
76    /// Get the path for a specific crate using CrateIdentifier
77    pub fn crate_path_for_id(&self, crate_id: &CrateIdentifier) -> PathBuf {
78        self.cache_dir
79            .join(CRATES_DIR)
80            .join(crate_id.name())
81            .join(crate_id.version())
82    }
83
84    /// Get the path for a specific workspace member
85    pub fn member_path(&self, name: &str, version: &str, member_name: &str) -> Result<PathBuf> {
86        use crate::cache::member_utils::{normalize_member_path, validate_member_path};
87
88        // Validate the member path for security
89        validate_member_path(member_name)?;
90
91        // Normalize the path for storage
92        let normalized = normalize_member_path(member_name);
93
94        Ok(self
95            .crate_path(name, version)?
96            .join(MEMBERS_DIR)
97            .join(normalized))
98    }
99
100    /// Get the source directory path for a crate
101    pub fn source_path(&self, name: &str, version: &str) -> Result<PathBuf> {
102        Ok(self.crate_path(name, version)?.join(SOURCE_DIR))
103    }
104
105    /// Get the documentation JSON path for a crate or workspace member
106    pub fn docs_path(
107        &self,
108        name: &str,
109        version: &str,
110        member_name: Option<&str>,
111    ) -> Result<PathBuf> {
112        let base_path = if let Some(member) = member_name {
113            self.member_path(name, version, member)?
114        } else {
115            self.crate_path(name, version)?
116        };
117        Ok(base_path.join(DOCS_FILE))
118    }
119
120    /// Get the metadata path for a crate or workspace member
121    pub fn metadata_path(
122        &self,
123        name: &str,
124        version: &str,
125        member_name: Option<&str>,
126    ) -> Result<PathBuf> {
127        let base_path = if let Some(member) = member_name {
128            self.member_path(name, version, member)?
129        } else {
130            self.crate_path(name, version)?
131        };
132        Ok(base_path.join(METADATA_FILE))
133    }
134
135    /// Get the dependencies path for a crate or workspace member
136    pub fn dependencies_path(
137        &self,
138        name: &str,
139        version: &str,
140        member_name: Option<&str>,
141    ) -> Result<PathBuf> {
142        let base_path = if let Some(member) = member_name {
143            self.member_path(name, version, member)?
144        } else {
145            self.crate_path(name, version)?
146        };
147        Ok(base_path.join(DEPENDENCIES_FILE))
148    }
149
150    /// Get the search index path for a crate or workspace member
151    pub fn search_index_path(
152        &self,
153        name: &str,
154        version: &str,
155        member_name: Option<&str>,
156    ) -> Result<PathBuf> {
157        let base_path = if let Some(member) = member_name {
158            self.member_path(name, version, member)?
159        } else {
160            self.crate_path(name, version)?
161        };
162        Ok(base_path.join(SEARCH_INDEX_DIR))
163    }
164
165    /// Check if a crate version is cached
166    pub fn is_cached(&self, name: &str, version: &str) -> bool {
167        let result = self
168            .crate_path(name, version)
169            .map(|p| p.exists())
170            .unwrap_or(false);
171        tracing::debug!("is_cached({}-{}) = {}", name, version, result);
172        result
173    }
174
175    /// Check if a workspace member is cached
176    ///
177    /// Accepts full member paths (e.g., "crates/rmcp") which are normalized internally
178    pub fn is_member_cached(&self, name: &str, version: &str, member_path: &str) -> bool {
179        // member_path method handles validation and normalization
180        self.member_path(name, version, member_path)
181            .map(|p| p.exists())
182            .unwrap_or(false)
183    }
184
185    /// Check if documentation is generated for a crate or workspace member
186    pub fn has_docs(&self, name: &str, version: &str, member_name: Option<&str>) -> bool {
187        self.docs_path(name, version, member_name)
188            .map(|p| p.exists())
189            .unwrap_or(false)
190    }
191
192    /// Check if a search index exists for a crate or workspace member
193    pub fn has_search_index(&self, name: &str, version: &str, member_name: Option<&str>) -> bool {
194        self.search_index_path(name, version, member_name)
195            .map(|p| p.exists())
196            .unwrap_or(false)
197    }
198
199    /// Ensure a directory exists
200    pub fn ensure_dir(&self, path: &Path) -> Result<()> {
201        fs::create_dir_all(path)
202            .with_context(|| format!("Failed to create directory: {}", path.display()))?;
203        Ok(())
204    }
205
206    /// Calculate the total size of a directory in bytes
207    #[allow(clippy::only_used_in_recursion)]
208    pub fn calculate_dir_size(&self, path: &Path) -> Result<u64> {
209        let mut total_size = 0u64;
210
211        if !path.exists() {
212            return Ok(0);
213        }
214
215        for entry in fs::read_dir(path)? {
216            let entry = entry?;
217            let metadata = entry.metadata()?;
218
219            if metadata.is_dir() {
220                total_size += self.calculate_dir_size(&entry.path())?;
221            } else {
222                total_size += metadata.len();
223            }
224        }
225
226        Ok(total_size)
227    }
228
229    /// Save metadata for a crate
230    pub fn save_metadata(&self, name: &str, version: &str) -> Result<()> {
231        self.save_metadata_with_source(name, version, "crates.io", None, None)
232    }
233
234    /// Save metadata for a crate with source information
235    pub fn save_metadata_with_source(
236        &self,
237        name: &str,
238        version: &str,
239        source: &str,
240        source_path: Option<&str>,
241        member_info: Option<MemberInfo>,
242    ) -> Result<()> {
243        // Extract member path as owned string to avoid borrowing issues
244        let member_path_string = member_info.as_ref().map(|info| info.original_path.clone());
245        let member_path_str = member_path_string.as_deref();
246
247        let base_path = match &member_info {
248            Some(info) => self.member_path(name, version, &info.original_path)?,
249            None => self.crate_path(name, version)?,
250        };
251
252        let size_bytes = self.calculate_dir_size(&base_path)?;
253
254        let metadata = CacheMetadata {
255            name: name.to_string(),
256            version: version.to_string(),
257            cached_at: chrono::Utc::now(),
258            doc_generated: self.has_docs(name, version, member_path_str),
259            size_bytes,
260            source: source.to_string(),
261            source_path: source_path.map(String::from),
262            member_info,
263        };
264
265        let metadata_path = self.metadata_path(name, version, member_path_str)?;
266        let json = serde_json::to_string_pretty(&metadata)?;
267        fs::write(metadata_path, json)?;
268        Ok(())
269    }
270
271    /// Load metadata for a crate or workspace member
272    pub fn load_metadata(
273        &self,
274        name: &str,
275        version: &str,
276        member_name: Option<&str>,
277    ) -> Result<CacheMetadata> {
278        let metadata_path = self.metadata_path(name, version, member_name)?;
279        let json = fs::read_to_string(metadata_path)?;
280        let metadata: CacheMetadata = serde_json::from_str(&json)?;
281        Ok(metadata)
282    }
283
284    /// Get all cached crate versions
285    pub fn list_cached_crates(&self) -> Result<Vec<CacheMetadata>> {
286        let crates_dir = self.cache_dir.join(CRATES_DIR);
287        let mut cached_crates = Vec::new();
288
289        if !crates_dir.exists() {
290            return Ok(cached_crates);
291        }
292
293        for crate_entry in fs::read_dir(&crates_dir)? {
294            let crate_entry = crate_entry?;
295            let crate_name = crate_entry.file_name().to_string_lossy().to_string();
296
297            if crate_entry.file_type()?.is_dir() {
298                for version_entry in fs::read_dir(crate_entry.path())? {
299                    let version_entry = version_entry?;
300                    let version = version_entry.file_name().to_string_lossy().to_string();
301
302                    if version_entry.file_type()?.is_dir() {
303                        // Try to load metadata, fall back to creating new metadata if not found
304                        let metadata = match self.load_metadata(&crate_name, &version, None) {
305                            Ok(meta) => meta,
306                            Err(_) => {
307                                // If metadata doesn't exist, create it based on file modification time
308                                let cached_at = version_entry
309                                    .metadata()
310                                    .and_then(|m| m.modified())
311                                    .map(chrono::DateTime::<chrono::Utc>::from)
312                                    .unwrap_or_else(|_| chrono::Utc::now());
313
314                                CacheMetadata {
315                                    name: crate_name.clone(),
316                                    version: version.clone(),
317                                    cached_at,
318                                    doc_generated: self.has_docs(
319                                        &crate_name,
320                                        &version_entry.file_name().to_string_lossy(),
321                                        None,
322                                    ),
323                                    size_bytes: 0,
324                                    source: default_source(),
325                                    source_path: None,
326                                    member_info: None,
327                                }
328                            }
329                        };
330                        cached_crates.push(metadata);
331                    }
332                }
333            }
334        }
335
336        Ok(cached_crates)
337    }
338
339    /// Get all workspace members for a cached crate
340    pub fn list_workspace_members(&self, name: &str, version: &str) -> Result<Vec<String>> {
341        let members_dir = self.crate_path(name, version)?.join(MEMBERS_DIR);
342        let mut members = Vec::new();
343
344        if !members_dir.exists() {
345            return Ok(members);
346        }
347
348        for member_entry in fs::read_dir(&members_dir)? {
349            let member_entry = member_entry?;
350            if member_entry.file_type()?.is_dir() {
351                let normalized_name = member_entry.file_name().to_string_lossy().to_string();
352
353                // Load metadata to get original path
354                let metadata_path = member_entry.path().join(METADATA_FILE);
355                match fs::read_to_string(&metadata_path) {
356                    Ok(content) => match serde_json::from_str::<CacheMetadata>(&content) {
357                        Ok(metadata) => {
358                            if let Some(member_info) = metadata.member_info {
359                                members.push(member_info.original_path);
360                                continue;
361                            }
362                        }
363                        Err(e) => {
364                            tracing::warn!(
365                                "Failed to parse member metadata for {}: {}",
366                                normalized_name,
367                                e
368                            );
369                        }
370                    },
371                    Err(e) => {
372                        tracing::warn!(
373                            "Failed to read member metadata for {}: {}",
374                            normalized_name,
375                            e
376                        );
377                    }
378                }
379
380                // This shouldn't happen with proper metadata
381                tracing::error!(
382                    "Member {} missing proper metadata in {}-{}",
383                    normalized_name,
384                    name,
385                    version
386                );
387            }
388        }
389
390        Ok(members)
391    }
392
393    /// Remove a cached crate version
394    pub fn remove_crate(&self, name: &str, version: &str) -> Result<()> {
395        let path = self.crate_path(name, version)?;
396        if path.exists() {
397            fs::remove_dir_all(&path)
398                .with_context(|| format!("Failed to remove crate cache: {name}/{version}"))?;
399        }
400        Ok(())
401    }
402
403    /// Copy a crate to a temporary backup location
404    pub fn backup_crate_to_temp(&self, name: &str, version: &str) -> Result<PathBuf> {
405        let source = self.crate_path(name, version)?;
406        if !source.exists() {
407            bail!("Crate {name}-{version} not found in cache");
408        }
409
410        let temp_dir = std::env::temp_dir().join(BACKUP_DIR_PREFIX).join(format!(
411            "{name}-{version}-{}-{}",
412            chrono::Utc::now()
413                .timestamp_nanos_opt()
414                .unwrap_or_else(|| chrono::Utc::now().timestamp_micros()),
415            std::process::id()
416        ));
417
418        self.ensure_dir(&temp_dir)?;
419        copy_directory_contents(&source, &temp_dir)
420            .with_context(|| format!("Failed to backup crate {name}-{version}"))?;
421
422        Ok(temp_dir)
423    }
424
425    /// Restore a crate from temporary backup
426    pub fn restore_crate_from_backup(
427        &self,
428        name: &str,
429        version: &str,
430        backup_path: &Path,
431    ) -> Result<()> {
432        if !backup_path.exists() {
433            bail!("Backup path does not exist: {}", backup_path.display());
434        }
435
436        let target = self.crate_path(name, version)?;
437
438        // Remove current version if it exists
439        if target.exists() {
440            fs::remove_dir_all(&target)
441                .with_context(|| "Failed to remove existing crate before restore".to_string())?;
442        }
443
444        // Ensure parent directory exists
445        if let Some(parent) = target.parent() {
446            self.ensure_dir(parent)?;
447        }
448
449        // Create the target directory first
450        self.ensure_dir(&target)?;
451
452        // Restore from backup
453        copy_directory_contents(backup_path, &target)
454            .with_context(|| format!("Failed to restore crate {name}-{version} from backup"))?;
455
456        Ok(())
457    }
458
459    /// Clean up temporary backup
460    pub fn cleanup_backup(&self, backup_path: &Path) -> Result<()> {
461        if backup_path.exists() {
462            fs::remove_dir_all(backup_path).with_context(|| {
463                format!("Failed to cleanup backup at {}", backup_path.display())
464            })?;
465        }
466        Ok(())
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473    use tempfile::TempDir;
474
475    #[test]
476    fn test_crate_path_validation() {
477        let temp_dir = TempDir::new().unwrap();
478        let storage = CacheStorage::new(Some(temp_dir.path().to_path_buf())).unwrap();
479
480        // Test path traversal attempts are rejected
481        assert!(storage.crate_path("../../../etc/passwd", "1.0.0").is_err());
482        assert!(storage.crate_path("crate/../../../etc", "1.0.0").is_err());
483        assert!(storage.crate_path("..", "1.0.0").is_err());
484
485        // Test path separators are rejected
486        assert!(storage.crate_path("crate/subcrate", "1.0.0").is_err());
487        assert!(storage.crate_path("crate\\subcrate", "1.0.0").is_err());
488        assert!(storage.crate_path("/absolute/path", "1.0.0").is_err());
489
490        // Test valid names work
491        assert!(storage.crate_path("valid-crate", "1.0.0").is_ok());
492        assert!(storage.crate_path("valid_crate", "1.0.0").is_ok());
493    }
494
495    #[test]
496    fn test_member_path_validation() {
497        let temp_dir = TempDir::new().unwrap();
498        let storage = CacheStorage::new(Some(temp_dir.path().to_path_buf())).unwrap();
499
500        // Test member name validation
501        assert!(
502            storage
503                .member_path("valid-crate", "1.0.0", "../../../etc")
504                .is_err()
505        );
506        assert!(
507            storage
508                .member_path("valid-crate", "1.0.0", "member/../../other")
509                .is_err()
510        );
511        assert!(storage.member_path("valid-crate", "1.0.0", "..").is_err());
512        assert!(
513            storage
514                .member_path("valid-crate", "1.0.0", "/absolute")
515                .is_err()
516        );
517        assert!(
518            storage
519                .member_path("valid-crate", "1.0.0", "C:\\windows")
520                .is_err()
521        );
522
523        // Test valid member names
524        assert!(storage.member_path("valid-crate", "1.0.0", "rmcp").is_ok());
525        assert!(
526            storage
527                .member_path("valid-crate", "1.0.0", "rmcp-macros")
528                .is_ok()
529        );
530        assert!(
531            storage
532                .member_path("valid-crate", "1.0.0", "my_member")
533                .is_ok()
534        );
535    }
536
537    #[test]
538    fn test_all_path_methods_validate() {
539        let temp_dir = TempDir::new().unwrap();
540        let storage = CacheStorage::new(Some(temp_dir.path().to_path_buf())).unwrap();
541
542        let malicious_name = "../../../etc/passwd";
543        let version = "1.0.0";
544
545        // Ensure all methods that take name/version validate input
546        assert!(storage.crate_path(malicious_name, version).is_err());
547        assert!(storage.source_path(malicious_name, version).is_err());
548        assert!(storage.docs_path(malicious_name, version, None).is_err());
549        assert!(
550            storage
551                .metadata_path(malicious_name, version, None)
552                .is_err()
553        );
554        assert!(
555            storage
556                .dependencies_path(malicious_name, version, None)
557                .is_err()
558        );
559        assert!(
560            storage
561                .search_index_path(malicious_name, version, None)
562                .is_err()
563        );
564
565        // Test member path methods
566        let malicious_member = "../../other";
567        assert!(
568            storage
569                .member_path("valid", version, malicious_member)
570                .is_err()
571        );
572        assert!(
573            storage
574                .docs_path("valid", version, Some(malicious_member))
575                .is_err()
576        );
577        assert!(
578            storage
579                .metadata_path("valid", version, Some(malicious_member))
580                .is_err()
581        );
582        assert!(
583            storage
584                .dependencies_path("valid", version, Some(malicious_member))
585                .is_err()
586        );
587        assert!(
588            storage
589                .search_index_path("valid", version, Some(malicious_member))
590                .is_err()
591        );
592    }
593}