Skip to main content

thoughts_tool/config/
validation.rs

1use crate::git::ref_key::encode_ref_key;
2use crate::repo_identity::{RepoIdentity, parse_url_and_subpath};
3use anyhow::{Result, bail};
4use std::borrow::Cow;
5
6/// Sanitize a mount name for use as directory name
7pub fn sanitize_mount_name(name: &str) -> String {
8    name.chars()
9        .map(|c| match c {
10            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => c,
11            _ => '_',
12        })
13        .collect()
14}
15
16/// Return true if string looks like a git URL we support
17pub fn is_git_url(s: &str) -> bool {
18    let s = s.trim();
19    s.starts_with("git@")
20        || s.starts_with("https://")
21        || s.starts_with("http://")
22        || s.starts_with("ssh://")
23}
24
25/// Extract host from SSH/HTTPS URLs
26pub fn get_host_from_url(url: &str) -> Result<String> {
27    let (base, _) = parse_url_and_subpath(url);
28    let id = RepoIdentity::parse(&base).map_err(|e| {
29        anyhow::anyhow!(
30            "Unsupported URL (cannot parse host): {}\nDetails: {}",
31            url,
32            e
33        )
34    })?;
35    Ok(id.host)
36}
37
38/// Validate that a reference URL is well-formed and points to org/repo (repo-level only)
39pub fn validate_reference_url(url: &str) -> Result<()> {
40    let url = url.trim();
41    if url.contains('?') || url.contains('#') {
42        bail!(
43            "Reference URLs cannot contain '?' or '#' alternate ref encodings: {}",
44            url
45        );
46    }
47    let (base, subpath) = parse_url_and_subpath(url);
48    if subpath.is_some() {
49        bail!(
50            "Cannot add URL with subpath as a reference: {}\n\n\
51             References are repo-level only.\n\
52             Try one of:\n\
53               - Add the repository URL without a subpath\n\
54               - Use 'thoughts mount add <local-subdir>' for subdirectory mounts",
55            url
56        );
57    }
58    if !is_git_url(&base) {
59        bail!(
60            "Invalid reference value: {}\n\n\
61             Must be a git URL using one of:\n  - git@host:org/repo(.git)\n  - https://host/org/repo(.git)\n  - ssh://user@host[:port]/org/repo(.git)\n",
62            url
63        );
64    }
65    // Ensure org/repo structure is parseable via RepoIdentity
66    RepoIdentity::parse(&base).map_err(|e| {
67        anyhow::anyhow!(
68            "Invalid repository URL: {}\n\n\
69             Expected a URL with an org and repo (e.g., github.com/org/repo).\n\
70             Details: {}",
71            url,
72            e
73        )
74    })?;
75    Ok(())
76}
77
78/// Canonical key (host, org_path, repo) all lowercased, without .git
79pub fn canonical_reference_key(url: &str) -> Result<(String, String, String)> {
80    let (base, _) = parse_url_and_subpath(url);
81    let key = RepoIdentity::parse(&base)?.canonical_key();
82    Ok((key.host, key.org_path, key.repo))
83}
84
85/// Canonical key for a specific reference instance: repository identity plus optional ref key.
86fn normalize_pinned_ref_name_for_identity(ref_name: &str) -> Cow<'_, str> {
87    if let Some(rest) = ref_name.strip_prefix("refs/remotes/")
88        && let Some((_remote, branch)) = rest.split_once('/')
89        && !branch.is_empty()
90    {
91        return Cow::Owned(format!("refs/heads/{branch}"));
92    }
93
94    Cow::Borrowed(ref_name)
95}
96
97pub(crate) fn normalize_encoded_ref_key_for_identity(ref_key: &str) -> Cow<'_, str> {
98    const REMOTES_PREFIX: &str = "r-refs~2fremotes~2f";
99    const HEADS_PREFIX: &str = "r-refs~2fheads~2f";
100
101    if let Some(rest) = ref_key.strip_prefix(REMOTES_PREFIX)
102        && let Some((_remote_enc, branch_enc)) = rest.split_once("~2f")
103        && !branch_enc.is_empty()
104    {
105        return Cow::Owned(format!("{HEADS_PREFIX}{branch_enc}"));
106    }
107
108    Cow::Borrowed(ref_key)
109}
110
111pub fn canonical_reference_instance_key(
112    url: &str,
113    ref_name: Option<&str>,
114) -> Result<(String, String, String, Option<String>)> {
115    let (host, org_path, repo) = canonical_reference_key(url)?;
116    let ref_key = ref_name
117        .map(normalize_pinned_ref_name_for_identity)
118        .map(|name| encode_ref_key(name.as_ref()))
119        .transpose()?;
120    Ok((host, org_path, repo, ref_key))
121}
122
123pub fn validate_pinned_ref_full_name(ref_name: &str) -> Result<()> {
124    let trimmed = ref_name.trim();
125    if trimmed.is_empty() {
126        bail!("ref cannot be empty");
127    }
128    if trimmed != ref_name {
129        bail!("Pinned ref must not contain leading/trailing whitespace");
130    }
131    if trimmed.ends_with('/') {
132        bail!("Pinned ref cannot end with '/'");
133    }
134    let ref_name = trimmed;
135
136    if let Some(rest) = ref_name.strip_prefix("refs/heads/") {
137        if rest.is_empty() {
138            bail!("Pinned ref cannot be the bare prefix 'refs/heads/'");
139        }
140        return Ok(());
141    }
142
143    if let Some(rest) = ref_name.strip_prefix("refs/tags/") {
144        if rest.is_empty() {
145            bail!("Pinned ref cannot be the bare prefix 'refs/tags/'");
146        }
147        return Ok(());
148    }
149
150    if let Some(rest) = ref_name.strip_prefix("refs/remotes/") {
151        let mut parts = rest.splitn(2, '/');
152        let remote = parts.next().unwrap_or("");
153        let branch = parts.next().unwrap_or("");
154        if remote.is_empty() || branch.is_empty() {
155            bail!(
156                "Legacy pinned ref must be 'refs/remotes/<remote>/<branch>' (got '{}')",
157                ref_name
158            );
159        }
160        return Ok(());
161    }
162
163    bail!(
164        "Pinned refs must be full ref names starting with 'refs/heads/', 'refs/tags/', or 'refs/remotes/' (got '{}')",
165        ref_name
166    );
167}
168
169pub fn validate_pinned_ref_full_name_new_input(ref_name: &str) -> Result<()> {
170    let trimmed = ref_name.trim();
171    if trimmed.is_empty() {
172        bail!("ref cannot be empty");
173    }
174    if trimmed != ref_name {
175        bail!("Pinned ref must not contain leading/trailing whitespace");
176    }
177    if trimmed.ends_with('/') {
178        bail!("Pinned ref cannot end with '/'");
179    }
180    let ref_name = trimmed;
181
182    if let Some(rest) = ref_name.strip_prefix("refs/heads/") {
183        if rest.is_empty() {
184            bail!("Pinned ref cannot be the bare prefix 'refs/heads/'");
185        }
186        return Ok(());
187    }
188
189    if let Some(rest) = ref_name.strip_prefix("refs/tags/") {
190        if rest.is_empty() {
191            bail!("Pinned ref cannot be the bare prefix 'refs/tags/'");
192        }
193        return Ok(());
194    }
195
196    bail!(
197        "Pinned refs must be full ref names starting with 'refs/heads/' or 'refs/tags/' (got '{}')",
198        ref_name
199    );
200}
201
202// --- MCP HTTPS-only validation helpers ---
203
204/// True if the URL uses SSH schemes we do not support in MCP
205pub fn is_ssh_url(s: &str) -> bool {
206    let s = s.trim();
207    s.starts_with("git@") || s.starts_with("ssh://")
208}
209
210/// True if URL starts with https://
211pub fn is_https_url(s: &str) -> bool {
212    s.trim_start().to_lowercase().starts_with("https://")
213}
214
215/// Validate MCP add_reference input:
216/// - Reject SSH and http://
217/// - Reject subpaths
218/// - Accept GitHub web or clone URLs (https://github.com/org/repo[.git])
219/// - Accept generic https://*.git clone URLs
220pub fn validate_reference_url_https_only(url: &str) -> Result<()> {
221    let url = url.trim();
222
223    if url.contains('?') || url.contains('#') {
224        bail!(
225            "Reference URLs cannot contain '?' or '#' alternate ref encodings: {}",
226            url
227        );
228    }
229
230    // Reject subpaths (URL:subpath)
231    let (base, subpath) = parse_url_and_subpath(url);
232    if subpath.is_some() {
233        bail!(
234            "Cannot add URL with subpath as a reference: {}\n\nReferences are repo-level only.",
235            url
236        );
237    }
238
239    if is_ssh_url(&base) {
240        bail!(
241            "SSH URLs are not supported by the MCP add_reference tool: {}\n\n\
242             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)\n\n\
243             If you must use SSH, run the CLI instead:\n  thoughts references add <git@... or ssh://...>",
244            base
245        );
246    }
247    if !is_https_url(&base) {
248        bail!(
249            "Only HTTPS URLs are supported by the MCP add_reference tool: {}\n\n\
250             Please provide an HTTPS URL, e.g.:\n  https://github.com/org/repo(.git)",
251            base
252        );
253    }
254
255    // Parse as RepoIdentity to validate structure
256    let id = RepoIdentity::parse(&base).map_err(|e| {
257        anyhow::anyhow!(
258            "Invalid repository URL (expected host/org/repo).\nDetails: {}",
259            e
260        )
261    })?;
262
263    // For non-GitHub hosts, require .git suffix
264    if id.host != "github.com" && !base.ends_with(".git") {
265        bail!(
266            "For non-GitHub hosts, please provide an HTTPS clone URL ending with .git:\n  {}",
267            base
268        );
269    }
270
271    Ok(())
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    #[test]
279    fn test_sanitize_mount_name() {
280        assert_eq!(sanitize_mount_name("valid-name_123"), "valid-name_123");
281        assert_eq!(sanitize_mount_name("bad name!@#"), "bad_name___");
282        assert_eq!(sanitize_mount_name("CamelCase"), "CamelCase");
283    }
284}
285
286#[cfg(test)]
287mod ref_validation_tests {
288    use super::*;
289
290    #[test]
291    fn test_is_git_url() {
292        assert!(is_git_url("git@github.com:org/repo.git"));
293        assert!(is_git_url("https://github.com/org/repo"));
294        assert!(is_git_url("ssh://user@host:22/org/repo"));
295        assert!(is_git_url("http://gitlab.com/org/repo"));
296        assert!(!is_git_url("org/repo"));
297        assert!(!is_git_url("/local/path"));
298    }
299
300    #[test]
301    fn test_validate_reference_url_accepts_valid() {
302        assert!(validate_reference_url("git@github.com:org/repo.git").is_ok());
303        assert!(validate_reference_url("https://github.com/org/repo").is_ok());
304    }
305
306    #[test]
307    fn test_validate_reference_url_rejects_subpath() {
308        assert!(validate_reference_url("git@github.com:org/repo.git:docs").is_err());
309    }
310
311    #[test]
312    fn test_canonical_reference_key_normalizes() {
313        let a = canonical_reference_key("git@github.com:User/Repo.git").unwrap();
314        let b = canonical_reference_key("https://github.com/user/repo").unwrap();
315        assert_eq!(a, b);
316        assert_eq!(a, ("github.com".into(), "user".into(), "repo".into()));
317    }
318
319    #[test]
320    fn test_canonical_reference_instance_key_distinguishes_refs() {
321        let main = canonical_reference_instance_key(
322            "https://github.com/user/repo",
323            Some("refs/heads/main"),
324        )
325        .unwrap();
326        let tag = canonical_reference_instance_key(
327            "https://github.com/user/repo",
328            Some("refs/tags/v1.0.0"),
329        )
330        .unwrap();
331        let unpinned =
332            canonical_reference_instance_key("https://github.com/user/repo", None).unwrap();
333
334        assert_ne!(main, tag);
335        assert_ne!(main, unpinned);
336        assert_ne!(tag, unpinned);
337    }
338
339    #[test]
340    fn test_canonical_reference_instance_key_normalizes_legacy_refs_remotes_to_heads() {
341        let legacy = canonical_reference_instance_key(
342            "https://github.com/org/repo",
343            Some("refs/remotes/origin/main"),
344        )
345        .unwrap();
346        let canonical = canonical_reference_instance_key(
347            "https://github.com/org/repo",
348            Some("refs/heads/main"),
349        )
350        .unwrap();
351
352        assert_eq!(legacy, canonical);
353    }
354
355    #[test]
356    fn test_normalize_encoded_ref_key_for_identity_collapses_legacy_remotes() {
357        let legacy = encode_ref_key("refs/remotes/origin/main").unwrap();
358        let canonical = encode_ref_key("refs/heads/main").unwrap();
359
360        assert_eq!(
361            normalize_encoded_ref_key_for_identity(&legacy).as_ref(),
362            canonical
363        );
364    }
365
366    #[test]
367    fn test_validate_reference_url_rejects_query_and_fragment() {
368        assert!(validate_reference_url("https://github.com/org/repo?ref=main").is_err());
369        assert!(validate_reference_url("https://github.com/org/repo#main").is_err());
370    }
371}
372
373#[cfg(test)]
374mod mcp_https_validation_tests {
375    use super::*;
376
377    #[test]
378    fn test_https_only_accepts_github_web_and_clone() {
379        assert!(validate_reference_url_https_only("https://github.com/org/repo").is_ok());
380        assert!(validate_reference_url_https_only("https://github.com/org/repo.git").is_ok());
381    }
382
383    #[test]
384    fn test_https_only_accepts_generic_dot_git() {
385        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj.git").is_ok());
386    }
387
388    #[test]
389    fn test_https_only_rejects_ssh_and_http_and_subpath() {
390        assert!(validate_reference_url_https_only("git@github.com:org/repo.git").is_err());
391        assert!(validate_reference_url_https_only("ssh://host/org/repo.git").is_err());
392        assert!(validate_reference_url_https_only("http://github.com/org/repo.git").is_err());
393        assert!(validate_reference_url_https_only("https://github.com/org/repo.git:docs").is_err());
394    }
395
396    #[test]
397    fn test_is_ssh_url_helper() {
398        assert!(is_ssh_url("git@github.com:org/repo.git"));
399        assert!(is_ssh_url("ssh://user@host/repo.git"));
400        assert!(!is_ssh_url("https://github.com/org/repo"));
401        assert!(!is_ssh_url("http://github.com/org/repo"));
402    }
403
404    #[test]
405    fn test_is_https_url_helper() {
406        assert!(is_https_url("https://github.com/org/repo"));
407        assert!(is_https_url("HTTPS://github.com/org/repo")); // case-insensitive
408        assert!(!is_https_url("http://github.com/org/repo"));
409        assert!(!is_https_url("git@github.com:org/repo"));
410    }
411
412    #[test]
413    fn test_https_only_rejects_non_github_without_dot_git() {
414        // Non-GitHub without .git suffix should be rejected
415        assert!(validate_reference_url_https_only("https://gitlab.com/group/proj").is_err());
416    }
417
418    #[test]
419    fn test_https_only_rejects_query_and_fragment() {
420        assert!(validate_reference_url_https_only("https://github.com/org/repo?ref=main").is_err());
421        assert!(validate_reference_url_https_only("https://github.com/org/repo#main").is_err());
422    }
423}
424
425#[cfg(test)]
426mod pinned_ref_name_tests {
427    use super::validate_pinned_ref_full_name;
428
429    #[test]
430    fn accepts_allowed_full_refs() {
431        assert!(validate_pinned_ref_full_name("refs/heads/main").is_ok());
432        assert!(validate_pinned_ref_full_name("refs/tags/v1.0.0").is_ok());
433        assert!(validate_pinned_ref_full_name("refs/remotes/origin/main").is_ok());
434    }
435
436    #[test]
437    fn rejects_shorthand_and_other_namespaces() {
438        assert!(validate_pinned_ref_full_name("main").is_err());
439        assert!(validate_pinned_ref_full_name("v1.0.0").is_err());
440        assert!(validate_pinned_ref_full_name("origin/main").is_err());
441        assert!(validate_pinned_ref_full_name("refs/pull/123/head").is_err());
442    }
443
444    #[test]
445    fn rejects_incomplete_prefixes() {
446        assert!(validate_pinned_ref_full_name("refs/heads/").is_err());
447        assert!(validate_pinned_ref_full_name("refs/tags/").is_err());
448        assert!(validate_pinned_ref_full_name("refs/remotes/").is_err());
449        assert!(validate_pinned_ref_full_name("refs/remotes/origin/").is_err());
450    }
451
452    #[test]
453    fn rejects_leading_and_trailing_whitespace() {
454        assert!(validate_pinned_ref_full_name(" refs/heads/main").is_err());
455        assert!(validate_pinned_ref_full_name("refs/heads/main ").is_err());
456        assert!(validate_pinned_ref_full_name(" refs/tags/v1.0.0 ").is_err());
457    }
458
459    #[test]
460    fn rejects_trailing_slash_full_refs() {
461        assert!(validate_pinned_ref_full_name("refs/heads/main/").is_err());
462        assert!(validate_pinned_ref_full_name("refs/tags/v1.0.0/").is_err());
463        assert!(validate_pinned_ref_full_name("refs/remotes/origin/main/").is_err());
464    }
465}
466
467#[cfg(test)]
468mod pinned_ref_name_new_input_tests {
469    use super::validate_pinned_ref_full_name_new_input;
470
471    #[test]
472    fn accepts_heads_and_tags_only() {
473        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main").is_ok());
474        assert!(validate_pinned_ref_full_name_new_input("refs/tags/v1.0.0").is_ok());
475    }
476
477    #[test]
478    fn rejects_refs_remotes_and_shorthand() {
479        assert!(validate_pinned_ref_full_name_new_input("refs/remotes/origin/main").is_err());
480        assert!(validate_pinned_ref_full_name_new_input("main").is_err());
481        assert!(validate_pinned_ref_full_name_new_input("refs/pull/123/head").is_err());
482    }
483
484    #[test]
485    fn new_input_rejects_incomplete_prefixes() {
486        assert!(validate_pinned_ref_full_name_new_input("refs/heads/").is_err());
487        assert!(validate_pinned_ref_full_name_new_input("refs/tags/").is_err());
488    }
489
490    #[test]
491    fn rejects_leading_and_trailing_whitespace() {
492        assert!(validate_pinned_ref_full_name_new_input(" refs/heads/main").is_err());
493        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main ").is_err());
494        assert!(validate_pinned_ref_full_name_new_input(" refs/tags/v1.0.0 ").is_err());
495    }
496
497    #[test]
498    fn rejects_trailing_slash_full_refs() {
499        assert!(validate_pinned_ref_full_name_new_input("refs/heads/main/").is_err());
500        assert!(validate_pinned_ref_full_name_new_input("refs/tags/v1.0.0/").is_err());
501    }
502}