Skip to main content

thoughts_tool/config/
repo_mapping_manager.rs

1use super::types::{RepoLocation, RepoMapping};
2use crate::repo_identity::{
3    RepoIdentity, RepoIdentityKey, parse_url_and_subpath as identity_parse_url_and_subpath,
4};
5use crate::utils::locks::FileLock;
6use crate::utils::paths::{self, sanitize_dir_name};
7use anyhow::{Context, Result, bail};
8use atomicwrites::{AllowOverwrite, AtomicFile};
9use std::io::Write;
10use std::path::{Component, Path, PathBuf};
11
12/// Indicates how a URL was resolved to a mapping.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum UrlResolutionKind {
15    /// The URL matched exactly as stored in repos.json
16    Exact,
17    /// The URL matched via canonical identity comparison (different scheme/format)
18    CanonicalFallback,
19}
20
21/// Details about a resolved URL mapping.
22#[derive(Debug, Clone)]
23pub struct ResolvedUrl {
24    /// The key in repos.json that matched
25    pub matched_url: String,
26    /// How the match was found
27    pub resolution: UrlResolutionKind,
28    /// The location details (cloned)
29    pub location: RepoLocation,
30}
31
32pub struct RepoMappingManager {
33    mapping_path: PathBuf,
34}
35
36impl RepoMappingManager {
37    pub fn new() -> Result<Self> {
38        let mapping_path = paths::get_repo_mapping_path()?;
39        Ok(Self { mapping_path })
40    }
41
42    /// Get the lock file path for repos.json RMW operations.
43    fn lock_path(&self) -> PathBuf {
44        let name = self
45            .mapping_path
46            .file_name()
47            .unwrap_or_default()
48            .to_string_lossy();
49        self.mapping_path.with_file_name(format!("{name}.lock"))
50    }
51
52    pub fn load(&self) -> Result<RepoMapping> {
53        if !self.mapping_path.exists() {
54            // First time - create empty mapping
55            let default = RepoMapping::default();
56            self.save(&default)?;
57            return Ok(default);
58        }
59
60        let contents = std::fs::read_to_string(&self.mapping_path)
61            .context("Failed to read repository mapping file")?;
62        let mapping: RepoMapping =
63            serde_json::from_str(&contents).context("Failed to parse repository mapping")?;
64        Ok(mapping)
65    }
66
67    pub fn save(&self, mapping: &RepoMapping) -> Result<()> {
68        // Ensure directory exists
69        if let Some(parent) = self.mapping_path.parent() {
70            paths::ensure_dir(parent)?;
71        }
72
73        // Atomic write for safety
74        let json = serde_json::to_string_pretty(mapping)?;
75        let af = AtomicFile::new(&self.mapping_path, AllowOverwrite);
76        af.write(|f| f.write_all(json.as_bytes()))?;
77
78        Ok(())
79    }
80
81    /// Load the mapping while holding an exclusive lock.
82    ///
83    /// Returns the mapping and the lock guard. The lock is released when the
84    /// guard is dropped, so callers should hold it until after `save()`.
85    ///
86    /// Use this for read-modify-write operations to prevent concurrent updates
87    /// from losing changes.
88    pub fn load_locked(&self) -> Result<(RepoMapping, FileLock)> {
89        let lock = FileLock::lock_exclusive(self.lock_path())?;
90        let mapping = self.load()?;
91        Ok((mapping, lock))
92    }
93
94    /// Resolve a git URL with detailed resolution information.
95    ///
96    /// Returns the matched URL key, resolution kind, location, and optional subpath.
97    pub fn resolve_url_with_details(
98        &self,
99        url: &str,
100    ) -> Result<Option<(ResolvedUrl, Option<String>)>> {
101        let mapping = self.load()?; // read-only; atomic writes make this safe
102        let (base_url, subpath) = parse_url_and_subpath(url);
103
104        // Try exact match first
105        if let Some(loc) = mapping.mappings.get(&base_url) {
106            return Ok(Some((
107                ResolvedUrl {
108                    matched_url: base_url,
109                    resolution: UrlResolutionKind::Exact,
110                    location: loc.clone(),
111                },
112                subpath,
113            )));
114        }
115
116        // Canonical fallback: parse target URL and find a matching key
117        let target_key = match RepoIdentity::parse(&base_url) {
118            Ok(id) => id.canonical_key(),
119            Err(_) => return Ok(None),
120        };
121
122        let mut matches: Vec<(String, RepoLocation)> = mapping
123            .mappings
124            .iter()
125            .filter_map(|(k, v)| {
126                let (k_base, _) = parse_url_and_subpath(k);
127                let key = RepoIdentity::parse(&k_base).ok()?.canonical_key();
128                (key == target_key).then(|| (k.clone(), v.clone()))
129            })
130            .collect();
131
132        // Sort for deterministic selection
133        matches.sort_by(|a, b| a.0.cmp(&b.0));
134
135        if let Some((matched_url, location)) = matches.into_iter().next() {
136            return Ok(Some((
137                ResolvedUrl {
138                    matched_url,
139                    resolution: UrlResolutionKind::CanonicalFallback,
140                    location,
141                },
142                subpath,
143            )));
144        }
145
146        Ok(None)
147    }
148
149    /// Resolve a git URL to its local path.
150    ///
151    /// Uses exact match first, then falls back to canonical identity matching
152    /// to handle URL scheme variants (SSH vs HTTPS).
153    pub fn resolve_url(&self, url: &str) -> Result<Option<PathBuf>> {
154        if let Some((resolved, subpath)) = self.resolve_url_with_details(url)? {
155            let mut p = resolved.location.path.clone();
156            if let Some(ref sub) = subpath {
157                validate_subpath(sub)?;
158                p = p.join(sub);
159            }
160            return Ok(Some(p));
161        }
162        Ok(None)
163    }
164
165    /// Add a URL-to-path mapping with identity-based upsert.
166    ///
167    /// If a mapping with the same canonical identity already exists,
168    /// it will be replaced (preserving any existing last_sync time).
169    /// This prevents duplicate entries for SSH vs HTTPS variants.
170    pub fn add_mapping(&mut self, url: String, path: PathBuf, auto_managed: bool) -> Result<()> {
171        let _lock = FileLock::lock_exclusive(self.lock_path())?;
172        let mut mapping = self.load()?; // safe under lock for RMW
173
174        // Basic validation
175        if !path.exists() {
176            bail!("Path does not exist: {}", path.display());
177        }
178
179        if !path.is_dir() {
180            bail!("Path is not a directory: {}", path.display());
181        }
182
183        let (base_url, _) = parse_url_and_subpath(&url);
184        let new_key = RepoIdentity::parse(&base_url)?.canonical_key();
185
186        // Find all existing entries with the same canonical identity
187        let matching_urls: Vec<String> = mapping
188            .mappings
189            .keys()
190            .filter_map(|k| {
191                let (k_base, _) = parse_url_and_subpath(k);
192                let key = RepoIdentity::parse(&k_base).ok()?.canonical_key();
193                (key == new_key).then(|| k.clone())
194            })
195            .collect();
196
197        // Preserve last_sync from any existing entry
198        let preserved_last_sync = matching_urls
199            .iter()
200            .filter_map(|k| mapping.mappings.get(k).and_then(|loc| loc.last_sync))
201            .max();
202
203        // Remove all matching entries
204        for k in matching_urls {
205            mapping.mappings.remove(&k);
206        }
207
208        // Insert the new mapping
209        mapping.mappings.insert(
210            base_url,
211            RepoLocation {
212                path,
213                auto_managed,
214                last_sync: preserved_last_sync,
215            },
216        );
217
218        self.save(&mapping)?;
219        Ok(())
220    }
221
222    /// Remove a URL mapping
223    #[allow(dead_code)]
224    // TODO(2): Add "thoughts mount unmap" command for cleanup
225    pub fn remove_mapping(&mut self, url: &str) -> Result<()> {
226        let _lock = FileLock::lock_exclusive(self.lock_path())?;
227        let mut mapping = self.load()?;
228        mapping.mappings.remove(url);
229        self.save(&mapping)?;
230        Ok(())
231    }
232
233    /// Check if a URL is auto-managed
234    pub fn is_auto_managed(&self, url: &str) -> Result<bool> {
235        let mapping = self.load()?;
236        Ok(mapping
237            .mappings
238            .get(url)
239            .map(|loc| loc.auto_managed)
240            .unwrap_or(false))
241    }
242
243    /// Get default clone path for a URL using hierarchical layout.
244    ///
245    /// Returns `~/.thoughts/clones/{host}/{org_path}/{repo}` with sanitized directory names.
246    pub fn get_default_clone_path(url: &str) -> Result<PathBuf> {
247        let home = dirs::home_dir()
248            .ok_or_else(|| anyhow::anyhow!("Could not determine home directory"))?;
249
250        let (base_url, _sub) = parse_url_and_subpath(url);
251        let id = RepoIdentity::parse(&base_url)?;
252        let key = id.canonical_key(); // use canonical for stable paths across case/scheme
253
254        let mut p = home
255            .join(".thoughts")
256            .join("clones")
257            .join(sanitize_dir_name(&key.host));
258        for seg in key.org_path.split('/') {
259            if !seg.is_empty() {
260                p = p.join(sanitize_dir_name(seg));
261            }
262        }
263        p = p.join(sanitize_dir_name(&key.repo));
264        Ok(p)
265    }
266
267    /// Update last sync time for a URL.
268    ///
269    /// Uses canonical fallback to update the correct entry even if the URL
270    /// scheme differs from what's stored.
271    pub fn update_sync_time(&mut self, url: &str) -> Result<()> {
272        let _lock = FileLock::lock_exclusive(self.lock_path())?;
273        let mut mapping = self.load()?;
274        let now = chrono::Utc::now();
275
276        // Prefer exact base_url key
277        let (base_url, _) = parse_url_and_subpath(url);
278        if let Some(loc) = mapping.mappings.get_mut(&base_url) {
279            loc.last_sync = Some(now);
280            self.save(&mapping)?;
281            return Ok(());
282        }
283
284        // Fall back to canonical match
285        // Note: We need to find the matching key without holding a mutable borrow
286        let target_key = RepoIdentity::parse(&base_url)?.canonical_key();
287
288        // TODO(2): If repos.json contains multiple entries with the same canonical identity (legacy
289        // duplicates), the selection below is nondeterministic due to HashMap iteration order.
290        // Consider sorting (as in `resolve_url_with_details`) or updating all matches.
291        let matched_key: Option<String> = mapping
292            .mappings
293            .keys()
294            .filter_map(|k| {
295                let (k_base, _) = parse_url_and_subpath(k);
296                let key = RepoIdentity::parse(&k_base).ok()?.canonical_key();
297                (key == target_key).then(|| k.clone())
298            })
299            .next();
300
301        if let Some(key) = matched_key
302            && let Some(loc) = mapping.mappings.get_mut(&key)
303        {
304            loc.last_sync = Some(now);
305            self.save(&mapping)?;
306        }
307
308        Ok(())
309    }
310
311    /// Get the canonical identity key for a URL, if parseable.
312    pub fn get_canonical_key(url: &str) -> Option<RepoIdentityKey> {
313        let (base, _) = parse_url_and_subpath(url);
314        RepoIdentity::parse(&base).ok().map(|id| id.canonical_key())
315    }
316}
317
318/// Parse a URL into (base_url, optional_subpath).
319///
320/// Delegates to the repo_identity module for robust port-aware parsing.
321pub fn parse_url_and_subpath(url: &str) -> (String, Option<String>) {
322    identity_parse_url_and_subpath(url)
323}
324
325/// Validate a subpath to prevent directory traversal attacks.
326///
327/// Rejects absolute paths and paths containing ".." components that could
328/// escape the repository root directory.
329fn validate_subpath(subpath: &str) -> Result<()> {
330    let path = Path::new(subpath);
331    if path.is_absolute() {
332        bail!(
333            "Invalid subpath (must be relative and not contain '..'): {}",
334            subpath
335        );
336    }
337    for component in path.components() {
338        match component {
339            Component::ParentDir => {
340                bail!(
341                    "Invalid subpath (must be relative and not contain '..'): {}",
342                    subpath
343                );
344            }
345            Component::Prefix(_) => {
346                bail!(
347                    "Invalid subpath (must be relative and not contain '..'): {}",
348                    subpath
349                );
350            }
351            _ => {}
352        }
353    }
354    Ok(())
355}
356
357pub fn extract_repo_name_from_url(url: &str) -> Result<String> {
358    let url = url.trim_end_matches(".git");
359
360    // Handle different URL formats
361    if let Some(pos) = url.rfind('/') {
362        Ok(url[pos + 1..].to_string())
363    } else if let Some(pos) = url.rfind(':') {
364        // SSH format like git@github.com:user/repo
365        if let Some(slash_pos) = url[pos + 1..].rfind('/') {
366            Ok(url[pos + 1 + slash_pos + 1..].to_string())
367        } else {
368            Ok(url[pos + 1..].to_string())
369        }
370    } else {
371        bail!("Cannot extract repository name from URL: {}", url)
372    }
373}
374
375/// Extract org_path and repo from a URL.
376///
377/// Delegates to RepoIdentity for robust parsing that handles:
378/// - SSH with ports: `ssh://git@host:2222/org/repo.git`
379/// - GitLab subgroups: `https://gitlab.com/a/b/c/repo.git`
380/// - Azure DevOps: `https://dev.azure.com/org/proj/_git/repo`
381pub fn extract_org_repo_from_url(url: &str) -> anyhow::Result<(String, String)> {
382    let (base, _) = parse_url_and_subpath(url);
383    let id = RepoIdentity::parse(&base)?;
384    Ok((id.org_path, id.repo))
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390
391    #[test]
392    fn test_parse_url_and_subpath() {
393        let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git");
394        assert_eq!(url, "git@github.com:user/repo.git");
395        assert_eq!(sub, None);
396
397        let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:docs/api");
398        assert_eq!(url, "git@github.com:user/repo.git");
399        assert_eq!(sub, Some("docs/api".to_string()));
400
401        let (url, sub) = parse_url_and_subpath("https://github.com/user/repo");
402        assert_eq!(url, "https://github.com/user/repo");
403        assert_eq!(sub, None);
404    }
405
406    #[test]
407    fn test_extract_repo_name() {
408        assert_eq!(
409            extract_repo_name_from_url("git@github.com:user/repo.git").unwrap(),
410            "repo"
411        );
412        assert_eq!(
413            extract_repo_name_from_url("https://github.com/user/repo").unwrap(),
414            "repo"
415        );
416        assert_eq!(
417            extract_repo_name_from_url("git@github.com:user/repo").unwrap(),
418            "repo"
419        );
420    }
421
422    #[test]
423    fn test_extract_org_repo() {
424        assert_eq!(
425            extract_org_repo_from_url("git@github.com:user/repo.git").unwrap(),
426            ("user".to_string(), "repo".to_string())
427        );
428        assert_eq!(
429            extract_org_repo_from_url("https://github.com/user/repo").unwrap(),
430            ("user".to_string(), "repo".to_string())
431        );
432        assert_eq!(
433            extract_org_repo_from_url("git@github.com:user/repo").unwrap(),
434            ("user".to_string(), "repo".to_string())
435        );
436        assert_eq!(
437            extract_org_repo_from_url("https://github.com/modelcontextprotocol/rust-sdk.git")
438                .unwrap(),
439            ("modelcontextprotocol".to_string(), "rust-sdk".to_string())
440        );
441    }
442
443    #[test]
444    fn test_default_clone_path_hierarchical() {
445        // Test hierarchical path: ~/.thoughts/clones/{host}/{org}/{repo}
446        let p =
447            RepoMappingManager::get_default_clone_path("git@github.com:org/repo.git:docs").unwrap();
448        assert!(p.ends_with(std::path::Path::new(".thoughts/clones/github.com/org/repo")));
449    }
450
451    #[test]
452    fn test_default_clone_path_gitlab_subgroups() {
453        let p = RepoMappingManager::get_default_clone_path(
454            "https://gitlab.com/group/subgroup/team/repo.git",
455        )
456        .unwrap();
457        assert!(p.ends_with(std::path::Path::new(
458            ".thoughts/clones/gitlab.com/group/subgroup/team/repo"
459        )));
460    }
461
462    #[test]
463    fn test_default_clone_path_ssh_port() {
464        let p = RepoMappingManager::get_default_clone_path(
465            "ssh://git@myhost.example.com:2222/org/repo.git",
466        )
467        .unwrap();
468        assert!(p.ends_with(std::path::Path::new(
469            ".thoughts/clones/myhost.example.com/org/repo"
470        )));
471    }
472
473    #[test]
474    fn test_canonical_key_consistency() {
475        let ssh_key = RepoMappingManager::get_canonical_key("git@github.com:Org/Repo.git").unwrap();
476        let https_key =
477            RepoMappingManager::get_canonical_key("https://github.com/org/repo").unwrap();
478        assert_eq!(
479            ssh_key, https_key,
480            "SSH and HTTPS should have same canonical key"
481        );
482    }
483
484    // TODO(2): Add integration test for resolve_url_with_details canonical fallback path.
485    // This test verifies keys match, but doesn't test that resolve_url_with_details
486    // actually uses canonical matching to find mappings stored under different URL schemes.
487    // Test should: 1) add mapping with SSH URL, 2) resolve with HTTPS URL,
488    // 3) verify CanonicalFallback resolution kind is returned.
489
490    #[test]
491    fn test_validate_subpath_accepts_valid_paths() {
492        // Simple relative paths should be accepted
493        assert!(validate_subpath("docs").is_ok());
494        assert!(validate_subpath("docs/api").is_ok());
495        assert!(validate_subpath("src/lib/utils").is_ok());
496        assert!(validate_subpath("a/b/c/d/e").is_ok());
497    }
498
499    #[test]
500    fn test_validate_subpath_rejects_parent_dir_traversal() {
501        // Parent directory traversal should be rejected
502        assert!(validate_subpath("..").is_err());
503        assert!(validate_subpath("../etc").is_err());
504        assert!(validate_subpath("docs/../..").is_err());
505        assert!(validate_subpath("docs/../../etc").is_err());
506        assert!(validate_subpath("a/b/c/../../../etc").is_err());
507    }
508
509    #[test]
510    fn test_validate_subpath_rejects_absolute_paths() {
511        // Absolute paths should be rejected
512        assert!(validate_subpath("/etc").is_err());
513        assert!(validate_subpath("/etc/passwd").is_err());
514        assert!(validate_subpath("/home/user/.ssh").is_err());
515    }
516}