Skip to main content

thoughts_tool/config/
validation.rs

1use crate::git::ref_key::encode_ref_key;
2use crate::repo_identity::RepoIdentity;
3use crate::repo_identity::parse_url_and_subpath;
4use anyhow::Result;
5use anyhow::bail;
6use std::borrow::Cow;
7
8/// Sanitize a mount name for use as directory name
9pub fn sanitize_mount_name(name: &str) -> String {
10    name.chars()
11        .map(|c| match c {
12            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => c,
13            _ => '_',
14        })
15        .collect()
16}
17
18/// Return true if string looks like a git URL we support
19pub fn is_git_url(s: &str) -> bool {
20    let s = s.trim();
21    s.starts_with("git@")
22        || s.starts_with("https://")
23        || s.starts_with("http://")
24        || s.starts_with("ssh://")
25}
26
27/// Extract host from SSH/HTTPS URLs
28pub fn get_host_from_url(url: &str) -> Result<String> {
29    let (base, _) = parse_url_and_subpath(url);
30    let id = RepoIdentity::parse(&base)
31        .map_err(|e| anyhow::anyhow!("Unsupported URL (cannot parse host): {url}\nDetails: {e}"))?;
32    Ok(id.host)
33}
34
35/// Validate that a reference URL is well-formed and points to org/repo (repo-level only)
36pub fn validate_reference_url(url: &str) -> Result<()> {
37    let url = url.trim();
38    if url.contains('?') || url.contains('#') {
39        bail!("Reference URLs cannot contain '?' or '#' alternate ref encodings: {url}");
40    }
41    let (base, subpath) = parse_url_and_subpath(url);
42    if subpath.is_some() {
43        bail!(
44            "Cannot add URL with subpath as a reference: {url}\n\n\
45             References are repo-level only.\n\
46             Try one of:\n\
47               - Add the repository URL without a subpath\n\
48               - Use 'thoughts mount add <local-subdir>' for subdirectory mounts"
49        );
50    }
51    if !is_git_url(&base) {
52        bail!(
53            "Invalid reference value: {url}\n\n\
54             Must be a git URL using one of:\n  - git@host:org/repo(.git)\n  - https://host/org/repo(.git)\n  - ssh://user@host[:port]/org/repo(.git)\n"
55        );
56    }
57    // Ensure org/repo structure is parseable via RepoIdentity
58    RepoIdentity::parse(&base).map_err(|e| {
59        anyhow::anyhow!(
60            "Invalid repository URL: {url}\n\n\
61             Expected a URL with an org and repo (e.g., github.com/org/repo).\n\
62             Details: {e}"
63        )
64    })?;
65    Ok(())
66}
67
68/// Canonical key (host, `org_path`, repo) all lowercased, without .git
69pub fn canonical_reference_key(url: &str) -> Result<(String, String, String)> {
70    let (base, _) = parse_url_and_subpath(url);
71    let key = RepoIdentity::parse(&base)?.canonical_key();
72    Ok((key.host, key.org_path, key.repo))
73}
74
75/// Canonical key for a specific reference instance: repository identity plus optional ref key.
76fn normalize_pinned_ref_name_for_identity(ref_name: &str) -> Cow<'_, str> {
77    if let Some(rest) = ref_name.strip_prefix("refs/remotes/")
78        && let Some((_remote, branch)) = rest.split_once('/')
79        && !branch.is_empty()
80    {
81        return Cow::Owned(format!("refs/heads/{branch}"));
82    }
83
84    Cow::Borrowed(ref_name)
85}
86
87pub(crate) fn normalize_encoded_ref_key_for_identity(ref_key: &str) -> Cow<'_, str> {
88    const REMOTES_PREFIX: &str = "r-refs~2fremotes~2f";
89    const HEADS_PREFIX: &str = "r-refs~2fheads~2f";
90
91    if let Some(rest) = ref_key.strip_prefix(REMOTES_PREFIX)
92        && let Some((_remote_enc, branch_enc)) = rest.split_once("~2f")
93        && !branch_enc.is_empty()
94    {
95        return Cow::Owned(format!("{HEADS_PREFIX}{branch_enc}"));
96    }
97
98    Cow::Borrowed(ref_key)
99}
100
101pub fn canonical_reference_instance_key(
102    url: &str,
103    ref_name: Option<&str>,
104) -> Result<(String, String, String, Option<String>)> {
105    let (host, org_path, repo) = canonical_reference_key(url)?;
106    let ref_key = ref_name
107        .map(normalize_pinned_ref_name_for_identity)
108        .map(|name| encode_ref_key(name.as_ref()))
109        .transpose()?;
110    Ok((host, org_path, repo, ref_key))
111}
112
113pub fn validate_pinned_ref_full_name(ref_name: &str) -> Result<()> {
114    let trimmed = ref_name.trim();
115    if trimmed.is_empty() {
116        bail!("ref cannot be empty");
117    }
118    if trimmed != ref_name {
119        bail!("Pinned ref must not contain leading/trailing whitespace");
120    }
121    if trimmed.ends_with('/') {
122        bail!("Pinned ref cannot end with '/'");
123    }
124    let ref_name = trimmed;
125
126    if let Some(rest) = ref_name.strip_prefix("refs/heads/") {
127        if rest.is_empty() {
128            bail!("Pinned ref cannot be the bare prefix 'refs/heads/'");
129        }
130        return Ok(());
131    }
132
133    if let Some(rest) = ref_name.strip_prefix("refs/tags/") {
134        if rest.is_empty() {
135            bail!("Pinned ref cannot be the bare prefix 'refs/tags/'");
136        }
137        return Ok(());
138    }
139
140    if let Some(rest) = ref_name.strip_prefix("refs/remotes/") {
141        let mut parts = rest.splitn(2, '/');
142        let remote = parts.next().unwrap_or("");
143        let branch = parts.next().unwrap_or("");
144        if remote.is_empty() || branch.is_empty() {
145            bail!("Legacy pinned ref must be 'refs/remotes/<remote>/<branch>' (got '{ref_name}')");
146        }
147        return Ok(());
148    }
149
150    bail!(
151        "Pinned refs must be full ref names starting with 'refs/heads/', 'refs/tags/', or 'refs/remotes/' (got '{ref_name}')"
152    );
153}
154
155pub fn validate_pinned_ref_full_name_new_input(ref_name: &str) -> Result<()> {
156    let trimmed = ref_name.trim();
157    if trimmed.is_empty() {
158        bail!("ref cannot be empty");
159    }
160    if trimmed != ref_name {
161        bail!("Pinned ref must not contain leading/trailing whitespace");
162    }
163    if trimmed.ends_with('/') {
164        bail!("Pinned ref cannot end with '/'");
165    }
166    let ref_name = trimmed;
167
168    if let Some(rest) = ref_name.strip_prefix("refs/heads/") {
169        if rest.is_empty() {
170            bail!("Pinned ref cannot be the bare prefix 'refs/heads/'");
171        }
172        return Ok(());
173    }
174
175    if let Some(rest) = ref_name.strip_prefix("refs/tags/") {
176        if rest.is_empty() {
177            bail!("Pinned ref cannot be the bare prefix 'refs/tags/'");
178        }
179        return Ok(());
180    }
181
182    bail!(
183        "Pinned refs must be full ref names starting with 'refs/heads/' or 'refs/tags/' (got '{ref_name}')"
184    );
185}
186
187// --- MCP HTTPS-only validation helpers ---
188
189/// True if the URL uses SSH schemes we do not support in MCP
190pub fn is_ssh_url(s: &str) -> bool {
191    let s = s.trim();
192    s.starts_with("git@") || s.starts_with("ssh://")
193}
194
195/// True if URL starts with https://
196pub fn is_https_url(s: &str) -> bool {
197    s.trim_start().to_lowercase().starts_with("https://")
198}
199
200/// Validate MCP `add_reference` input:
201/// - Reject SSH and http://
202/// - Reject subpaths
203/// - Accept GitHub web or clone URLs (<https://github.com/org/repo>[.git])
204/// - Accept generic https://*.git clone URLs
205pub fn validate_reference_url_https_only(url: &str) -> Result<()> {
206    let url = url.trim();
207
208    if url.contains('?') || url.contains('#') {
209        bail!("Reference URLs cannot contain '?' or '#' alternate ref encodings: {url}");
210    }
211
212    // Reject subpaths (URL:subpath)
213    let (base, subpath) = parse_url_and_subpath(url);
214    if subpath.is_some() {
215        bail!(
216            "Cannot add URL with subpath as a reference: {url}\n\nReferences are repo-level only."
217        );
218    }
219
220    if is_ssh_url(&base) {
221        bail!(
222            "SSH URLs are not supported by the MCP add_reference tool: {base}\n\n\
223             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)\n\n\
224             If you must use SSH, run the CLI instead:\n  thoughts references add <git@... or ssh://...>"
225        );
226    }
227    if !is_https_url(&base) {
228        bail!(
229            "Only HTTPS URLs are supported by the MCP add_reference tool: {base}\n\n\
230             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)"
231        );
232    }
233
234    // Parse as RepoIdentity to validate structure
235    let id = RepoIdentity::parse(&base).map_err(|e| {
236        anyhow::anyhow!("Invalid repository URL (expected host/org/repo).\nDetails: {e}")
237    })?;
238
239    // For non-GitHub hosts, require .git suffix
240    let has_git_suffix = std::path::Path::new(&base)
241        .extension()
242        .is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
243    if id.host != "github.com" && !has_git_suffix {
244        bail!(
245            "For non-GitHub hosts, please provide an HTTPS clone URL ending with .git:\n  {base}"
246        );
247    }
248
249    Ok(())
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn test_sanitize_mount_name() {
258        assert_eq!(sanitize_mount_name("valid-name_123"), "valid-name_123");
259        assert_eq!(sanitize_mount_name("bad name!@#"), "bad_name___");
260        assert_eq!(sanitize_mount_name("CamelCase"), "CamelCase");
261    }
262}
263
264#[cfg(test)]
265mod ref_validation_tests {
266    use super::*;
267
268    #[test]
269    fn test_is_git_url() {
270        assert!(is_git_url("git@github.com:org/repo.git"));
271        assert!(is_git_url("https://github.com/org/repo"));
272        assert!(is_git_url("ssh://user@host:22/org/repo"));
273        assert!(is_git_url("http://gitlab.com/org/repo"));
274        assert!(!is_git_url("org/repo"));
275        assert!(!is_git_url("/local/path"));
276    }
277
278    #[test]
279    fn test_validate_reference_url_accepts_valid() {
280        assert!(validate_reference_url("git@github.com:org/repo.git").is_ok());
281        assert!(validate_reference_url("https://github.com/org/repo").is_ok());
282    }
283
284    #[test]
285    fn test_validate_reference_url_rejects_subpath() {
286        assert!(validate_reference_url("git@github.com:org/repo.git:docs").is_err());
287    }
288
289    #[test]
290    fn test_canonical_reference_key_normalizes() {
291        let a = canonical_reference_key("git@github.com:User/Repo.git").unwrap();
292        let b = canonical_reference_key("https://github.com/user/repo").unwrap();
293        assert_eq!(a, b);
294        assert_eq!(a, ("github.com".into(), "user".into(), "repo".into()));
295    }
296
297    #[test]
298    fn test_canonical_reference_instance_key_distinguishes_refs() {
299        let main = canonical_reference_instance_key(
300            "https://github.com/user/repo",
301            Some("refs/heads/main"),
302        )
303        .unwrap();
304        let tag = canonical_reference_instance_key(
305            "https://github.com/user/repo",
306            Some("refs/tags/v1.0.0"),
307        )
308        .unwrap();
309        let unpinned =
310            canonical_reference_instance_key("https://github.com/user/repo", None).unwrap();
311
312        assert_ne!(main, tag);
313        assert_ne!(main, unpinned);
314        assert_ne!(tag, unpinned);
315    }
316
317    #[test]
318    fn test_canonical_reference_instance_key_normalizes_legacy_refs_remotes_to_heads() {
319        let legacy = canonical_reference_instance_key(
320            "https://github.com/org/repo",
321            Some("refs/remotes/origin/main"),
322        )
323        .unwrap();
324        let canonical = canonical_reference_instance_key(
325            "https://github.com/org/repo",
326            Some("refs/heads/main"),
327        )
328        .unwrap();
329
330        assert_eq!(legacy, canonical);
331    }
332
333    #[test]
334    fn test_normalize_encoded_ref_key_for_identity_collapses_legacy_remotes() {
335        let legacy = encode_ref_key("refs/remotes/origin/main").unwrap();
336        let canonical = encode_ref_key("refs/heads/main").unwrap();
337
338        assert_eq!(
339            normalize_encoded_ref_key_for_identity(&legacy).as_ref(),
340            canonical
341        );
342    }
343
344    #[test]
345    fn test_validate_reference_url_rejects_query_and_fragment() {
346        assert!(validate_reference_url("https://github.com/org/repo?ref=main").is_err());
347        assert!(validate_reference_url("https://github.com/org/repo#main").is_err());
348    }
349}
350
351#[cfg(test)]
352mod mcp_https_validation_tests {
353    use super::*;
354
355    #[test]
356    fn test_https_only_accepts_github_web_and_clone() {
357        assert!(validate_reference_url_https_only("https://github.com/org/repo").is_ok());
358        assert!(validate_reference_url_https_only("https://github.com/org/repo.git").is_ok());
359    }
360
361    #[test]
362    fn test_https_only_accepts_generic_dot_git() {
363        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj.git").is_ok());
364    }
365
366    #[test]
367    fn test_https_only_rejects_ssh_and_http_and_subpath() {
368        assert!(validate_reference_url_https_only("git@github.com:org/repo.git").is_err());
369        assert!(validate_reference_url_https_only("ssh://host/org/repo.git").is_err());
370        assert!(validate_reference_url_https_only("http://github.com/org/repo.git").is_err());
371        assert!(validate_reference_url_https_only("https://github.com/org/repo.git:docs").is_err());
372    }
373
374    #[test]
375    fn test_is_ssh_url_helper() {
376        assert!(is_ssh_url("git@github.com:org/repo.git"));
377        assert!(is_ssh_url("ssh://user@host/repo.git"));
378        assert!(!is_ssh_url("https://github.com/org/repo"));
379        assert!(!is_ssh_url("http://github.com/org/repo"));
380    }
381
382    #[test]
383    fn test_is_https_url_helper() {
384        assert!(is_https_url("https://github.com/org/repo"));
385        assert!(is_https_url("HTTPS://github.com/org/repo")); // case-insensitive
386        assert!(!is_https_url("http://github.com/org/repo"));
387        assert!(!is_https_url("git@github.com:org/repo"));
388    }
389
390    #[test]
391    fn test_https_only_rejects_non_github_without_dot_git() {
392        // Non-GitHub without .git suffix should be rejected
393        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj").is_err());
394    }
395
396    #[test]
397    fn test_https_only_rejects_query_and_fragment() {
398        assert!(validate_reference_url_https_only("https://github.com/org/repo?ref=main").is_err());
399        assert!(validate_reference_url_https_only("https://github.com/org/repo#main").is_err());
400    }
401}
402
403#[cfg(test)]
404mod pinned_ref_name_tests {
405    use super::validate_pinned_ref_full_name;
406
407    #[test]
408    fn accepts_allowed_full_refs() {
409        assert!(validate_pinned_ref_full_name("refs/heads/main").is_ok());
410        assert!(validate_pinned_ref_full_name("refs/tags/v1.0.0").is_ok());
411        assert!(validate_pinned_ref_full_name("refs/remotes/origin/main").is_ok());
412    }
413
414    #[test]
415    fn rejects_shorthand_and_other_namespaces() {
416        assert!(validate_pinned_ref_full_name("main").is_err());
417        assert!(validate_pinned_ref_full_name("v1.0.0").is_err());
418        assert!(validate_pinned_ref_full_name("origin/main").is_err());
419        assert!(validate_pinned_ref_full_name("refs/pull/123/head").is_err());
420    }
421
422    #[test]
423    fn rejects_incomplete_prefixes() {
424        assert!(validate_pinned_ref_full_name("refs/heads/").is_err());
425        assert!(validate_pinned_ref_full_name("refs/tags/").is_err());
426        assert!(validate_pinned_ref_full_name("refs/remotes/").is_err());
427        assert!(validate_pinned_ref_full_name("refs/remotes/origin/").is_err());
428    }
429
430    #[test]
431    fn rejects_leading_and_trailing_whitespace() {
432        assert!(validate_pinned_ref_full_name(" refs/heads/main").is_err());
433        assert!(validate_pinned_ref_full_name("refs/heads/main ").is_err());
434        assert!(validate_pinned_ref_full_name(" refs/tags/v1.0.0 ").is_err());
435    }
436
437    #[test]
438    fn rejects_trailing_slash_full_refs() {
439        assert!(validate_pinned_ref_full_name("refs/heads/main/").is_err());
440        assert!(validate_pinned_ref_full_name("refs/tags/v1.0.0/").is_err());
441        assert!(validate_pinned_ref_full_name("refs/remotes/origin/main/").is_err());
442    }
443}
444
445#[cfg(test)]
446mod pinned_ref_name_new_input_tests {
447    use super::validate_pinned_ref_full_name_new_input;
448
449    #[test]
450    fn accepts_heads_and_tags_only() {
451        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main").is_ok());
452        assert!(validate_pinned_ref_full_name_new_input("refs/tags/v1.0.0").is_ok());
453    }
454
455    #[test]
456    fn rejects_refs_remotes_and_shorthand() {
457        assert!(validate_pinned_ref_full_name_new_input("refs/remotes/origin/main").is_err());
458        assert!(validate_pinned_ref_full_name_new_input("main").is_err());
459        assert!(validate_pinned_ref_full_name_new_input("refs/pull/123/head").is_err());
460    }
461
462    #[test]
463    fn new_input_rejects_incomplete_prefixes() {
464        assert!(validate_pinned_ref_full_name_new_input("refs/heads/").is_err());
465        assert!(validate_pinned_ref_full_name_new_input("refs/tags/").is_err());
466    }
467
468    #[test]
469    fn rejects_leading_and_trailing_whitespace() {
470        assert!(validate_pinned_ref_full_name_new_input(" refs/heads/main").is_err());
471        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main ").is_err());
472        assert!(validate_pinned_ref_full_name_new_input(" refs/tags/v1.0.0 ").is_err());
473    }
474
475    #[test]
476    fn rejects_trailing_slash_full_refs() {
477        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main/").is_err());
478        assert!(validate_pinned_ref_full_name_new_input("refs/tags/v1.0.0/").is_err());
479    }
480}