Skip to main content

thoughts_tool/config/
validation.rs

1use crate::git::ref_key::encode_ref_key;
2use crate::repo_identity::RepoIdentity;
3use crate::repo_identity::parse_url_and_subpath;
4use anyhow::Result;
5use anyhow::bail;
6use std::borrow::Cow;
7
8/// Sanitize a mount name for use as directory name
9pub fn sanitize_mount_name(name: &str) -> String {
10    name.chars()
11        .map(|c| match c {
12            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => c,
13            _ => '_',
14        })
15        .collect()
16}
17
18/// Return true if string looks like a git URL we support
19pub fn is_git_url(s: &str) -> bool {
20    let s = s.trim();
21    s.starts_with("git@")
22        || s.starts_with("https://")
23        || s.starts_with("http://")
24        || s.starts_with("ssh://")
25}
26
27/// Extract host from SSH/HTTPS URLs
28pub fn get_host_from_url(url: &str) -> Result<String> {
29    let (base, _) = parse_url_and_subpath(url);
30    let id = RepoIdentity::parse(&base).map_err(|e| {
31        anyhow::anyhow!(
32            "Unsupported URL (cannot parse host): {}\nDetails: {}",
33            url,
34            e
35        )
36    })?;
37    Ok(id.host)
38}
39
40/// Validate that a reference URL is well-formed and points to org/repo (repo-level only)
41pub fn validate_reference_url(url: &str) -> Result<()> {
42    let url = url.trim();
43    if url.contains('?') || url.contains('#') {
44        bail!(
45            "Reference URLs cannot contain '?' or '#' alternate ref encodings: {}",
46            url
47        );
48    }
49    let (base, subpath) = parse_url_and_subpath(url);
50    if subpath.is_some() {
51        bail!(
52            "Cannot add URL with subpath as a reference: {}\n\n\
53             References are repo-level only.\n\
54             Try one of:\n\
55               - Add the repository URL without a subpath\n\
56               - Use 'thoughts mount add <local-subdir>' for subdirectory mounts",
57            url
58        );
59    }
60    if !is_git_url(&base) {
61        bail!(
62            "Invalid reference value: {}\n\n\
63             Must be a git URL using one of:\n  - git@host:org/repo(.git)\n  - https://host/org/repo(.git)\n  - ssh://user@host[:port]/org/repo(.git)\n",
64            url
65        );
66    }
67    // Ensure org/repo structure is parseable via RepoIdentity
68    RepoIdentity::parse(&base).map_err(|e| {
69        anyhow::anyhow!(
70            "Invalid repository URL: {}\n\n\
71             Expected a URL with an org and repo (e.g., github.com/org/repo).\n\
72             Details: {}",
73            url,
74            e
75        )
76    })?;
77    Ok(())
78}
79
80/// Canonical key (host, org_path, repo) all lowercased, without .git
81pub fn canonical_reference_key(url: &str) -> Result<(String, String, String)> {
82    let (base, _) = parse_url_and_subpath(url);
83    let key = RepoIdentity::parse(&base)?.canonical_key();
84    Ok((key.host, key.org_path, key.repo))
85}
86
87/// Canonical key for a specific reference instance: repository identity plus optional ref key.
88fn normalize_pinned_ref_name_for_identity(ref_name: &str) -> Cow<'_, str> {
89    if let Some(rest) = ref_name.strip_prefix("refs/remotes/")
90        && let Some((_remote, branch)) = rest.split_once('/')
91        && !branch.is_empty()
92    {
93        return Cow::Owned(format!("refs/heads/{branch}"));
94    }
95
96    Cow::Borrowed(ref_name)
97}
98
99pub(crate) fn normalize_encoded_ref_key_for_identity(ref_key: &str) -> Cow<'_, str> {
100    const REMOTES_PREFIX: &str = "r-refs~2fremotes~2f";
101    const HEADS_PREFIX: &str = "r-refs~2fheads~2f";
102
103    if let Some(rest) = ref_key.strip_prefix(REMOTES_PREFIX)
104        && let Some((_remote_enc, branch_enc)) = rest.split_once("~2f")
105        && !branch_enc.is_empty()
106    {
107        return Cow::Owned(format!("{HEADS_PREFIX}{branch_enc}"));
108    }
109
110    Cow::Borrowed(ref_key)
111}
112
113pub fn canonical_reference_instance_key(
114    url: &str,
115    ref_name: Option<&str>,
116) -> Result<(String, String, String, Option<String>)> {
117    let (host, org_path, repo) = canonical_reference_key(url)?;
118    let ref_key = ref_name
119        .map(normalize_pinned_ref_name_for_identity)
120        .map(|name| encode_ref_key(name.as_ref()))
121        .transpose()?;
122    Ok((host, org_path, repo, ref_key))
123}
124
125pub fn validate_pinned_ref_full_name(ref_name: &str) -> Result<()> {
126    let trimmed = ref_name.trim();
127    if trimmed.is_empty() {
128        bail!("ref cannot be empty");
129    }
130    if trimmed != ref_name {
131        bail!("Pinned ref must not contain leading/trailing whitespace");
132    }
133    if trimmed.ends_with('/') {
134        bail!("Pinned ref cannot end with '/'");
135    }
136    let ref_name = trimmed;
137
138    if let Some(rest) = ref_name.strip_prefix("refs/heads/") {
139        if rest.is_empty() {
140            bail!("Pinned ref cannot be the bare prefix 'refs/heads/'");
141        }
142        return Ok(());
143    }
144
145    if let Some(rest) = ref_name.strip_prefix("refs/tags/") {
146        if rest.is_empty() {
147            bail!("Pinned ref cannot be the bare prefix 'refs/tags/'");
148        }
149        return Ok(());
150    }
151
152    if let Some(rest) = ref_name.strip_prefix("refs/remotes/") {
153        let mut parts = rest.splitn(2, '/');
154        let remote = parts.next().unwrap_or("");
155        let branch = parts.next().unwrap_or("");
156        if remote.is_empty() || branch.is_empty() {
157            bail!(
158                "Legacy pinned ref must be 'refs/remotes/<remote>/<branch>' (got '{}')",
159                ref_name
160            );
161        }
162        return Ok(());
163    }
164
165    bail!(
166        "Pinned refs must be full ref names starting with 'refs/heads/', 'refs/tags/', or 'refs/remotes/' (got '{}')",
167        ref_name
168    );
169}
170
171pub fn validate_pinned_ref_full_name_new_input(ref_name: &str) -> Result<()> {
172    let trimmed = ref_name.trim();
173    if trimmed.is_empty() {
174        bail!("ref cannot be empty");
175    }
176    if trimmed != ref_name {
177        bail!("Pinned ref must not contain leading/trailing whitespace");
178    }
179    if trimmed.ends_with('/') {
180        bail!("Pinned ref cannot end with '/'");
181    }
182    let ref_name = trimmed;
183
184    if let Some(rest) = ref_name.strip_prefix("refs/heads/") {
185        if rest.is_empty() {
186            bail!("Pinned ref cannot be the bare prefix 'refs/heads/'");
187        }
188        return Ok(());
189    }
190
191    if let Some(rest) = ref_name.strip_prefix("refs/tags/") {
192        if rest.is_empty() {
193            bail!("Pinned ref cannot be the bare prefix 'refs/tags/'");
194        }
195        return Ok(());
196    }
197
198    bail!(
199        "Pinned refs must be full ref names starting with 'refs/heads/' or 'refs/tags/' (got '{}')",
200        ref_name
201    );
202}
203
204// --- MCP HTTPS-only validation helpers ---
205
206/// True if the URL uses SSH schemes we do not support in MCP
207pub fn is_ssh_url(s: &str) -> bool {
208    let s = s.trim();
209    s.starts_with("git@") || s.starts_with("ssh://")
210}
211
212/// True if URL starts with https://
213pub fn is_https_url(s: &str) -> bool {
214    s.trim_start().to_lowercase().starts_with("https://")
215}
216
217/// Validate MCP add_reference input:
218/// - Reject SSH and http://
219/// - Reject subpaths
220/// - Accept GitHub web or clone URLs (https://github.com/org/repo[.git])
221/// - Accept generic https://*.git clone URLs
222pub fn validate_reference_url_https_only(url: &str) -> Result<()> {
223    let url = url.trim();
224
225    if url.contains('?') || url.contains('#') {
226        bail!(
227            "Reference URLs cannot contain '?' or '#' alternate ref encodings: {}",
228            url
229        );
230    }
231
232    // Reject subpaths (URL:subpath)
233    let (base, subpath) = parse_url_and_subpath(url);
234    if subpath.is_some() {
235        bail!(
236            "Cannot add URL with subpath as a reference: {}\n\nReferences are repo-level only.",
237            url
238        );
239    }
240
241    if is_ssh_url(&base) {
242        bail!(
243            "SSH URLs are not supported by the MCP add_reference tool: {}\n\n\
244             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)\n\n\
245             If you must use SSH, run the CLI instead:\n  thoughts references add <git@... or ssh://...>",
246            base
247        );
248    }
249    if !is_https_url(&base) {
250        bail!(
251            "Only HTTPS URLs are supported by the MCP add_reference tool: {}\n\n\
252             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)",
253            base
254        );
255    }
256
257    // Parse as RepoIdentity to validate structure
258    let id = RepoIdentity::parse(&base).map_err(|e| {
259        anyhow::anyhow!(
260            "Invalid repository URL (expected host/org/repo).\nDetails: {}",
261            e
262        )
263    })?;
264
265    // For non-GitHub hosts, require .git suffix
266    if id.host != "github.com" && !base.ends_with(".git") {
267        bail!(
268            "For non-GitHub hosts, please provide an HTTPS clone URL ending with .git:\n  {}",
269            base
270        );
271    }
272
273    Ok(())
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn test_sanitize_mount_name() {
282        assert_eq!(sanitize_mount_name("valid-name_123"), "valid-name_123");
283        assert_eq!(sanitize_mount_name("bad name!@#"), "bad_name___");
284        assert_eq!(sanitize_mount_name("CamelCase"), "CamelCase");
285    }
286}
287
288#[cfg(test)]
289mod ref_validation_tests {
290    use super::*;
291
292    #[test]
293    fn test_is_git_url() {
294        assert!(is_git_url("git@github.com:org/repo.git"));
295        assert!(is_git_url("https://github.com/org/repo"));
296        assert!(is_git_url("ssh://user@host:22/org/repo"));
297        assert!(is_git_url("http://gitlab.com/org/repo"));
298        assert!(!is_git_url("org/repo"));
299        assert!(!is_git_url("/local/path"));
300    }
301
302    #[test]
303    fn test_validate_reference_url_accepts_valid() {
304        assert!(validate_reference_url("git@github.com:org/repo.git").is_ok());
305        assert!(validate_reference_url("https://github.com/org/repo").is_ok());
306    }
307
308    #[test]
309    fn test_validate_reference_url_rejects_subpath() {
310        assert!(validate_reference_url("git@github.com:org/repo.git:docs").is_err());
311    }
312
313    #[test]
314    fn test_canonical_reference_key_normalizes() {
315        let a = canonical_reference_key("git@github.com:User/Repo.git").unwrap();
316        let b = canonical_reference_key("https://github.com/user/repo").unwrap();
317        assert_eq!(a, b);
318        assert_eq!(a, ("github.com".into(), "user".into(), "repo".into()));
319    }
320
321    #[test]
322    fn test_canonical_reference_instance_key_distinguishes_refs() {
323        let main = canonical_reference_instance_key(
324            "https://github.com/user/repo",
325            Some("refs/heads/main"),
326        )
327        .unwrap();
328        let tag = canonical_reference_instance_key(
329            "https://github.com/user/repo",
330            Some("refs/tags/v1.0.0"),
331        )
332        .unwrap();
333        let unpinned =
334            canonical_reference_instance_key("https://github.com/user/repo", None).unwrap();
335
336        assert_ne!(main, tag);
337        assert_ne!(main, unpinned);
338        assert_ne!(tag, unpinned);
339    }
340
341    #[test]
342    fn test_canonical_reference_instance_key_normalizes_legacy_refs_remotes_to_heads() {
343        let legacy = canonical_reference_instance_key(
344            "https://github.com/org/repo",
345            Some("refs/remotes/origin/main"),
346        )
347        .unwrap();
348        let canonical = canonical_reference_instance_key(
349            "https://github.com/org/repo",
350            Some("refs/heads/main"),
351        )
352        .unwrap();
353
354        assert_eq!(legacy, canonical);
355    }
356
357    #[test]
358    fn test_normalize_encoded_ref_key_for_identity_collapses_legacy_remotes() {
359        let legacy = encode_ref_key("refs/remotes/origin/main").unwrap();
360        let canonical = encode_ref_key("refs/heads/main").unwrap();
361
362        assert_eq!(
363            normalize_encoded_ref_key_for_identity(&legacy).as_ref(),
364            canonical
365        );
366    }
367
368    #[test]
369    fn test_validate_reference_url_rejects_query_and_fragment() {
370        assert!(validate_reference_url("https://github.com/org/repo?ref=main").is_err());
371        assert!(validate_reference_url("https://github.com/org/repo#main").is_err());
372    }
373}
374
375#[cfg(test)]
376mod mcp_https_validation_tests {
377    use super::*;
378
379    #[test]
380    fn test_https_only_accepts_github_web_and_clone() {
381        assert!(validate_reference_url_https_only("https://github.com/org/repo").is_ok());
382        assert!(validate_reference_url_https_only("https://github.com/org/repo.git").is_ok());
383    }
384
385    #[test]
386    fn test_https_only_accepts_generic_dot_git() {
387        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj.git").is_ok());
388    }
389
390    #[test]
391    fn test_https_only_rejects_ssh_and_http_and_subpath() {
392        assert!(validate_reference_url_https_only("git@github.com:org/repo.git").is_err());
393        assert!(validate_reference_url_https_only("ssh://host/org/repo.git").is_err());
394        assert!(validate_reference_url_https_only("http://github.com/org/repo.git").is_err());
395        assert!(validate_reference_url_https_only("https://github.com/org/repo.git:docs").is_err());
396    }
397
398    #[test]
399    fn test_is_ssh_url_helper() {
400        assert!(is_ssh_url("git@github.com:org/repo.git"));
401        assert!(is_ssh_url("ssh://user@host/repo.git"));
402        assert!(!is_ssh_url("https://github.com/org/repo"));
403        assert!(!is_ssh_url("http://github.com/org/repo"));
404    }
405
406    #[test]
407    fn test_is_https_url_helper() {
408        assert!(is_https_url("https://github.com/org/repo"));
409        assert!(is_https_url("HTTPS://github.com/org/repo")); // case-insensitive
410        assert!(!is_https_url("http://github.com/org/repo"));
411        assert!(!is_https_url("git@github.com:org/repo"));
412    }
413
414    #[test]
415    fn test_https_only_rejects_non_github_without_dot_git() {
416        // Non-GitHub without .git suffix should be rejected
417        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj").is_err());
418    }
419
420    #[test]
421    fn test_https_only_rejects_query_and_fragment() {
422        assert!(validate_reference_url_https_only("https://github.com/org/repo?ref=main").is_err());
423        assert!(validate_reference_url_https_only("https://github.com/org/repo#main").is_err());
424    }
425}
426
427#[cfg(test)]
428mod pinned_ref_name_tests {
429    use super::validate_pinned_ref_full_name;
430
431    #[test]
432    fn accepts_allowed_full_refs() {
433        assert!(validate_pinned_ref_full_name("refs/heads/main").is_ok());
434        assert!(validate_pinned_ref_full_name("refs/tags/v1.0.0").is_ok());
435        assert!(validate_pinned_ref_full_name("refs/remotes/origin/main").is_ok());
436    }
437
438    #[test]
439    fn rejects_shorthand_and_other_namespaces() {
440        assert!(validate_pinned_ref_full_name("main").is_err());
441        assert!(validate_pinned_ref_full_name("v1.0.0").is_err());
442        assert!(validate_pinned_ref_full_name("origin/main").is_err());
443        assert!(validate_pinned_ref_full_name("refs/pull/123/head").is_err());
444    }
445
446    #[test]
447    fn rejects_incomplete_prefixes() {
448        assert!(validate_pinned_ref_full_name("refs/heads/").is_err());
449        assert!(validate_pinned_ref_full_name("refs/tags/").is_err());
450        assert!(validate_pinned_ref_full_name("refs/remotes/").is_err());
451        assert!(validate_pinned_ref_full_name("refs/remotes/origin/").is_err());
452    }
453
454    #[test]
455    fn rejects_leading_and_trailing_whitespace() {
456        assert!(validate_pinned_ref_full_name(" refs/heads/main").is_err());
457        assert!(validate_pinned_ref_full_name("refs/heads/main ").is_err());
458        assert!(validate_pinned_ref_full_name(" refs/tags/v1.0.0 ").is_err());
459    }
460
461    #[test]
462    fn rejects_trailing_slash_full_refs() {
463        assert!(validate_pinned_ref_full_name("refs/heads/main/").is_err());
464        assert!(validate_pinned_ref_full_name("refs/tags/v1.0.0/").is_err());
465        assert!(validate_pinned_ref_full_name("refs/remotes/origin/main/").is_err());
466    }
467}
468
469#[cfg(test)]
470mod pinned_ref_name_new_input_tests {
471    use super::validate_pinned_ref_full_name_new_input;
472
473    #[test]
474    fn accepts_heads_and_tags_only() {
475        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main").is_ok());
476        assert!(validate_pinned_ref_full_name_new_input("refs/tags/v1.0.0").is_ok());
477    }
478
479    #[test]
480    fn rejects_refs_remotes_and_shorthand() {
481        assert!(validate_pinned_ref_full_name_new_input("refs/remotes/origin/main").is_err());
482        assert!(validate_pinned_ref_full_name_new_input("main").is_err());
483        assert!(validate_pinned_ref_full_name_new_input("refs/pull/123/head").is_err());
484    }
485
486    #[test]
487    fn new_input_rejects_incomplete_prefixes() {
488        assert!(validate_pinned_ref_full_name_new_input("refs/heads/").is_err());
489        assert!(validate_pinned_ref_full_name_new_input("refs/tags/").is_err());
490    }
491
492    #[test]
493    fn rejects_leading_and_trailing_whitespace() {
494        assert!(validate_pinned_ref_full_name_new_input(" refs/heads/main").is_err());
495        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main ").is_err());
496        assert!(validate_pinned_ref_full_name_new_input(" refs/tags/v1.0.0 ").is_err());
497    }
498
499    #[test]
500    fn rejects_trailing_slash_full_refs() {
501        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main/").is_err());
502        assert!(validate_pinned_ref_full_name_new_input("refs/tags/v1.0.0/").is_err());
503    }
504}