thoughts_tool/config/
validation.rs

1use crate::config::repo_mapping_manager::{extract_org_repo_from_url, parse_url_and_subpath};
2use anyhow::{Result, bail};
3
4/// Sanitize a mount name for use as directory name
5pub fn sanitize_mount_name(name: &str) -> String {
6    name.chars()
7        .map(|c| match c {
8            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => c,
9            _ => '_',
10        })
11        .collect()
12}
13
14/// Return true if string looks like a git URL we support
15pub fn is_git_url(s: &str) -> bool {
16    let s = s.trim();
17    s.starts_with("git@")
18        || s.starts_with("https://")
19        || s.starts_with("http://")
20        || s.starts_with("ssh://")
21}
22
23/// Extract host from SSH/HTTPS URLs
24pub fn get_host_from_url(url: &str) -> Result<String> {
25    let (base, _) = parse_url_and_subpath(url);
26    let base = base.trim_end_matches(".git");
27
28    if let Some(at) = base.find('@')
29        && let Some(colon) = base[at..].find(':')
30    {
31        let host = &base[at + 1..at + colon];
32        return Ok(host.to_lowercase());
33    }
34    if let Some(scheme) = base.find("://") {
35        let rest = &base[scheme + 3..];
36        let host = rest
37            .split('/')
38            .next()
39            .ok_or_else(|| anyhow::anyhow!("No host"))?;
40        // Strip userinfo and port if present (e.g., user@host:2222)
41        let host = host.split('@').next_back().unwrap_or(host);
42        let host = host.split(':').next().unwrap_or(host);
43        return Ok(host.to_lowercase());
44    }
45    bail!("Unsupported URL (cannot parse host): {}", url)
46}
47
48/// Validate that a reference URL is well-formed and points to org/repo (repo-level only)
49pub fn validate_reference_url(url: &str) -> Result<()> {
50    let url = url.trim();
51    let (base, subpath) = parse_url_and_subpath(url);
52    if subpath.is_some() {
53        bail!(
54            "Cannot add URL with subpath as a reference: {}\n\n\
55             References are repo-level only.\n\
56             Try one of:\n\
57               - Add the repository URL without a subpath\n\
58               - Use 'thoughts mount add <local-subdir>' for subdirectory mounts",
59            url
60        );
61    }
62    if !is_git_url(&base) {
63        bail!(
64            "Invalid reference value: {}\n\n\
65             Must be a git URL using one of:\n  - git@host:org/repo(.git)\n  - https://host/org/repo(.git)\n  - ssh://user@host[:port]/org/repo(.git)\n",
66            url
67        );
68    }
69    // Ensure org/repo structure is parseable
70    extract_org_repo_from_url(&base).map_err(|e| {
71        anyhow::anyhow!(
72            "Invalid repository URL: {}\n\n\
73             Expected a URL with an org and repo (e.g., github.com/org/repo).\n\
74             Details: {}",
75            url,
76            e
77        )
78    })?;
79    Ok(())
80}
81
82/// Canonical key (host, org, repo) all lowercased, without .git
83pub fn canonical_reference_key(url: &str) -> Result<(String, String, String)> {
84    let (base, _) = parse_url_and_subpath(url);
85    let (org, repo) = extract_org_repo_from_url(&base)?;
86    let host = get_host_from_url(&base)?;
87    Ok((host.to_lowercase(), org.to_lowercase(), repo.to_lowercase()))
88}
89
90// --- MCP HTTPS-only validation helpers ---
91
92/// True if the URL uses SSH schemes we do not support in MCP
93pub fn is_ssh_url(s: &str) -> bool {
94    let s = s.trim();
95    s.starts_with("git@") || s.starts_with("ssh://")
96}
97
98/// True if URL starts with https://
99pub fn is_https_url(s: &str) -> bool {
100    s.trim_start().to_lowercase().starts_with("https://")
101}
102
103/// Validate MCP add_reference input:
104/// - Reject SSH and http://
105/// - Reject subpaths
106/// - Accept GitHub web or clone URLs (https://github.com/org/repo[.git])
107/// - Accept generic https://*.git clone URLs
108pub fn validate_reference_url_https_only(url: &str) -> Result<()> {
109    let url = url.trim();
110
111    // Reject subpaths (URL:subpath)
112    let (base, subpath) = parse_url_and_subpath(url);
113    if subpath.is_some() {
114        bail!(
115            "Cannot add URL with subpath as a reference: {}\n\nReferences are repo-level only.",
116            url
117        );
118    }
119
120    if is_ssh_url(&base) {
121        bail!(
122            "SSH URLs are not supported by the MCP add_reference tool: {}\n\n\
123             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)\n\n\
124             If you must use SSH, run the CLI instead:\n  thoughts references add <git@... or ssh://...>",
125            base
126        );
127    }
128    if !is_https_url(&base) {
129        bail!(
130            "Only HTTPS URLs are supported by the MCP add_reference tool: {}\n\n\
131             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)",
132            base
133        );
134    }
135
136    // Determine host and require either GitHub web/clone, or generic https://*.git
137    let host = get_host_from_url(&base)?;
138    if host == "github.com" {
139        // Ensure org/repo parseability
140        extract_org_repo_from_url(&base).map_err(|e| {
141            anyhow::anyhow!(
142                "Invalid GitHub URL. Expected https://github.com/<org>/<repo>[.git]\nDetails: {}",
143                e
144            )
145        })?;
146        Ok(())
147    } else {
148        // Generic host: must end with .git and parse as org/repo
149        if !base.ends_with(".git") {
150            bail!(
151                "For non-GitHub hosts, please provide an HTTPS clone URL ending with .git:\n  {}",
152                base
153            );
154        }
155        extract_org_repo_from_url(&base).map_err(|e| {
156            anyhow::anyhow!(
157                "Invalid repository URL (expected host/org/repo.git).\nDetails: {}",
158                e
159            )
160        })?;
161        Ok(())
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn test_sanitize_mount_name() {
171        assert_eq!(sanitize_mount_name("valid-name_123"), "valid-name_123");
172        assert_eq!(sanitize_mount_name("bad name!@#"), "bad_name___");
173        assert_eq!(sanitize_mount_name("CamelCase"), "CamelCase");
174    }
175}
176
177#[cfg(test)]
178mod ref_validation_tests {
179    use super::*;
180
181    #[test]
182    fn test_is_git_url() {
183        assert!(is_git_url("git@github.com:org/repo.git"));
184        assert!(is_git_url("https://github.com/org/repo"));
185        assert!(is_git_url("ssh://user@host:22/org/repo"));
186        assert!(is_git_url("http://gitlab.com/org/repo"));
187        assert!(!is_git_url("org/repo"));
188        assert!(!is_git_url("/local/path"));
189    }
190
191    #[test]
192    fn test_validate_reference_url_accepts_valid() {
193        assert!(validate_reference_url("git@github.com:org/repo.git").is_ok());
194        assert!(validate_reference_url("https://github.com/org/repo").is_ok());
195    }
196
197    #[test]
198    fn test_validate_reference_url_rejects_subpath() {
199        assert!(validate_reference_url("git@github.com:org/repo.git:docs").is_err());
200    }
201
202    #[test]
203    fn test_canonical_reference_key_normalizes() {
204        let a = canonical_reference_key("git@github.com:User/Repo.git").unwrap();
205        let b = canonical_reference_key("https://github.com/user/repo").unwrap();
206        assert_eq!(a, b);
207        assert_eq!(a, ("github.com".into(), "user".into(), "repo".into()));
208    }
209}
210
211#[cfg(test)]
212mod mcp_https_validation_tests {
213    use super::*;
214
215    #[test]
216    fn test_https_only_accepts_github_web_and_clone() {
217        assert!(validate_reference_url_https_only("https://github.com/org/repo").is_ok());
218        assert!(validate_reference_url_https_only("https://github.com/org/repo.git").is_ok());
219    }
220
221    #[test]
222    fn test_https_only_accepts_generic_dot_git() {
223        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj.git").is_ok());
224    }
225
226    #[test]
227    fn test_https_only_rejects_ssh_and_http_and_subpath() {
228        assert!(validate_reference_url_https_only("git@github.com:org/repo.git").is_err());
229        assert!(validate_reference_url_https_only("ssh://host/org/repo.git").is_err());
230        assert!(validate_reference_url_https_only("http://github.com/org/repo.git").is_err());
231        assert!(validate_reference_url_https_only("https://github.com/org/repo.git:docs").is_err());
232    }
233
234    #[test]
235    fn test_is_ssh_url_helper() {
236        assert!(is_ssh_url("git@github.com:org/repo.git"));
237        assert!(is_ssh_url("ssh://user@host/repo.git"));
238        assert!(!is_ssh_url("https://github.com/org/repo"));
239        assert!(!is_ssh_url("http://github.com/org/repo"));
240    }
241
242    #[test]
243    fn test_is_https_url_helper() {
244        assert!(is_https_url("https://github.com/org/repo"));
245        assert!(is_https_url("HTTPS://github.com/org/repo")); // case-insensitive
246        assert!(!is_https_url("http://github.com/org/repo"));
247        assert!(!is_https_url("git@github.com:org/repo"));
248    }
249
250    #[test]
251    fn test_https_only_rejects_non_github_without_dot_git() {
252        // Non-GitHub without .git suffix should be rejected
253        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj").is_err());
254    }
255}