Skip to main content

sema_core/
resolve.rs

1use std::path::{Path, PathBuf};
2
3use crate::error::SemaError;
4use crate::home::sema_home;
5
6/// Returns the packages directory: `sema_home()/packages/`.
7pub fn packages_dir() -> PathBuf {
8    sema_home().join("packages")
9}
10
11/// Determines if an import spec is a package path vs a file path.
12///
13/// Package paths either:
14/// - Contain `/` with a hostname-like first segment (e.g., `github.com/user/repo`)
15/// - Are short names that exist in `~/.sema/packages/` (e.g., `http-helpers`)
16///
17/// Rejects relative paths (`./`, `../`), `.sema` extensions, absolute paths,
18/// URLs with schemes (`://`), backslashes, and colons.
19pub fn is_package_import(spec: &str) -> bool {
20    if spec.starts_with("./")
21        || spec.starts_with("../")
22        || spec.ends_with(".sema")
23        || spec.starts_with('/')
24        || spec.contains("://")
25        || spec.contains('\\')
26        || spec.contains(':')
27    {
28        return false;
29    }
30
31    // Classic git-style: contains / (e.g., github.com/user/repo)
32    if spec.contains('/') {
33        return true;
34    }
35
36    // Registry-style short name: check if it exists in the packages directory
37    packages_dir().join(spec).is_dir()
38}
39
40/// Validate that a package spec contains no path traversal or dangerous segments.
41///
42/// Rejects: `..` segments, empty segments, schemes, backslashes, colons, NUL bytes.
43pub fn validate_package_spec(spec: &str) -> Result<(), SemaError> {
44    if spec.contains("://") {
45        return Err(SemaError::eval(format!(
46            "invalid package spec: URL schemes not allowed: {spec}"
47        ))
48        .with_hint("Use bare host/path format, e.g.: github.com/user/repo"));
49    }
50    if spec.starts_with('/') {
51        return Err(SemaError::eval(format!(
52            "invalid package spec: absolute paths not allowed: {spec}"
53        ))
54        .with_hint("Use bare host/path format, e.g.: github.com/user/repo"));
55    }
56    if spec.contains('\\') {
57        return Err(SemaError::eval(format!(
58            "invalid package spec: backslashes not allowed: {spec}"
59        )));
60    }
61    if spec.contains(':') {
62        return Err(SemaError::eval(format!(
63            "invalid package spec: colons not allowed: {spec}"
64        )));
65    }
66    if spec.contains('\0') {
67        return Err(SemaError::eval(
68            "invalid package spec: NUL byte not allowed".to_string(),
69        ));
70    }
71    for segment in spec.split('/') {
72        if segment.is_empty() || segment == "." || segment == ".." {
73            return Err(SemaError::eval(format!(
74                "invalid package spec: path traversal not allowed: {spec}"
75            )));
76        }
77    }
78    Ok(())
79}
80
81/// A validated package path (e.g., "github.com/user/repo").
82///
83/// Construction via `parse()` ensures the path has no traversal,
84/// schemes, backslashes, colons, or empty segments.
85#[derive(Debug, Clone, PartialEq, Eq, Hash)]
86pub struct PackagePath(String);
87
88impl PackagePath {
89    pub fn parse(s: &str) -> Result<Self, SemaError> {
90        validate_package_spec(s)?;
91        Ok(Self(s.to_string()))
92    }
93
94    pub fn as_str(&self) -> &str {
95        &self.0
96    }
97}
98
99impl std::fmt::Display for PackagePath {
100    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101        f.write_str(&self.0)
102    }
103}
104
105/// A parsed package spec: validated path + git ref (e.g., "github.com/user/repo@v1.0").
106///
107/// The git ref defaults to "main" when no `@ref` suffix is present.
108#[derive(Debug, Clone, PartialEq, Eq)]
109pub struct PackageSpec {
110    pub path: PackagePath,
111    pub git_ref: String,
112}
113
114impl PackageSpec {
115    pub fn parse(spec: &str) -> Result<Self, SemaError> {
116        let (path_str, git_ref) = if let Some((p, r)) = spec.rsplit_once('@') {
117            (p, r)
118        } else {
119            (spec, "main")
120        };
121
122        let path = PackagePath::parse(path_str)?;
123
124        if git_ref.is_empty() {
125            return Err(
126                SemaError::eval(format!("invalid package spec: empty git ref: {spec}"))
127                    .with_hint("Provide a ref after @, e.g.: github.com/user/repo@v1.0"),
128            );
129        }
130        if git_ref.contains('\0') {
131            return Err(SemaError::eval(
132                "invalid package spec: NUL byte in git ref".to_string(),
133            ));
134        }
135        // BIN-2: a ref starting with '-' would be parsed by `git checkout` as a
136        // flag (e.g. `-f`). Reject it here; `git checkout` has no safe `--`
137        // separator for refs (that turns the ref into a pathspec).
138        if git_ref.starts_with('-') {
139            return Err(SemaError::eval(format!(
140                "invalid package spec: git ref cannot start with '-': {git_ref}"
141            )));
142        }
143
144        Ok(Self {
145            path,
146            git_ref: git_ref.to_string(),
147        })
148    }
149
150    pub fn clone_url(&self) -> String {
151        format!("https://{}.git", self.path.as_str())
152    }
153
154    pub fn dest_dir(&self, packages_dir: &Path) -> PathBuf {
155        packages_dir.join(self.path.as_str())
156    }
157}
158
159/// Resolves a package spec to a filesystem path.
160///
161/// Resolution order:
162/// 1. `~/.sema/packages/<spec>.sema` (sub-module import)
163/// 2. `~/.sema/packages/<spec>/sema.toml` → custom entrypoint
164/// 3. `~/.sema/packages/<spec>/package.sema` (default entrypoint)
165pub fn resolve_package_import(spec: &str) -> Result<PathBuf, SemaError> {
166    resolve_package_import_in(spec, &packages_dir())
167}
168
169/// Resolves a package spec against a given packages directory.
170pub fn resolve_package_import_in(spec: &str, base: &Path) -> Result<PathBuf, SemaError> {
171    validate_package_spec(spec)?;
172
173    // 1. Direct file: <packages>/<spec>.sema
174    let direct = base.join(format!("{spec}.sema"));
175    if direct.is_file() {
176        verify_path_within(base, &direct)?;
177        return Ok(direct);
178    }
179
180    let pkg_dir = base.join(spec);
181
182    // 2. sema.toml with custom entrypoint
183    let toml_path = pkg_dir.join("sema.toml");
184    if toml_path.is_file() {
185        if let Some(entrypoint) = parse_entrypoint(&toml_path) {
186            // Validate the entrypoint itself doesn't escape the package dir
187            if entrypoint.contains("..") || entrypoint.starts_with('/') {
188                return Err(SemaError::eval(format!(
189                    "invalid entrypoint in {}: {entrypoint}",
190                    toml_path.display()
191                )));
192            }
193            let entry = pkg_dir.join(&entrypoint);
194            if entry.is_file() {
195                verify_path_within(base, &entry)?;
196                return Ok(entry);
197            }
198        }
199    }
200
201    // 3. Default entrypoint: package.sema
202    let mod_file = pkg_dir.join("package.sema");
203    if mod_file.is_file() {
204        verify_path_within(base, &mod_file)?;
205        return Ok(mod_file);
206    }
207
208    Err(SemaError::eval(format!("package not found: {spec}"))
209        .with_hint(format!("Run: sema pkg add {spec}")))
210}
211
212/// Verify that a resolved path stays within the expected base directory.
213fn verify_path_within(base: &Path, resolved: &Path) -> Result<(), SemaError> {
214    // Use canonicalize if both paths exist, otherwise check lexically
215    if let (Ok(canon_base), Ok(canon_resolved)) = (base.canonicalize(), resolved.canonicalize()) {
216        if !canon_resolved.starts_with(&canon_base) {
217            return Err(SemaError::eval(
218                "package path escapes packages directory".to_string(),
219            ));
220        }
221    }
222    Ok(())
223}
224
225/// Parse `entrypoint = "..."` from a sema.toml file.
226///
227/// Checks `[package].entrypoint` first, then falls back to a top-level `entrypoint` key.
228/// Ignores `entrypoint` keys in any other table (e.g. `[tool]`).
229fn parse_entrypoint(path: &Path) -> Option<String> {
230    let contents = std::fs::read_to_string(path).ok()?;
231    let doc: toml::Value = toml::from_str(&contents).ok()?;
232
233    // Check [package].entrypoint first
234    if let Some(ep) = doc
235        .get("package")
236        .and_then(|p| p.get("entrypoint"))
237        .and_then(|v| v.as_str())
238    {
239        return Some(ep.to_string());
240    }
241
242    // Fall back to top-level entrypoint
243    if let Some(ep) = doc.get("entrypoint").and_then(|v| v.as_str()) {
244        return Some(ep.to_string());
245    }
246
247    None
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use std::fs;
254
255    use std::sync::atomic::{AtomicU64, Ordering};
256
257    static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
258
259    /// Create a unique temp packages directory for testing.
260    fn temp_packages_dir() -> PathBuf {
261        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
262        let dir =
263            std::env::temp_dir().join(format!("sema-resolve-test-{}-{}", std::process::id(), id));
264        let _ = fs::remove_dir_all(&dir);
265        fs::create_dir_all(&dir).unwrap();
266        dir
267    }
268
269    // --- is_package_import tests ---
270
271    #[test]
272    fn test_is_package_import_valid() {
273        assert!(is_package_import("github.com/user/repo"));
274        assert!(is_package_import("github.com/user/repo/sub"));
275        assert!(is_package_import("gitlab.com/org/project"));
276    }
277
278    #[test]
279    fn test_is_package_import_file_paths() {
280        assert!(!is_package_import("./utils.sema"));
281        assert!(!is_package_import("../lib/utils.sema"));
282        assert!(!is_package_import("utils.sema"));
283        assert!(!is_package_import("/absolute/path.sema"));
284        assert!(!is_package_import("single-word"));
285        assert!(!is_package_import("github.com/user/repo.sema"));
286    }
287
288    #[test]
289    fn test_is_package_import_rejects_schemes() {
290        assert!(!is_package_import("https://github.com/user/repo"));
291        assert!(!is_package_import("http://example.com/pkg"));
292        assert!(!is_package_import("ssh://git@github.com/user/repo"));
293    }
294
295    #[test]
296    fn test_is_package_import_rejects_dangerous() {
297        assert!(!is_package_import("github.com\\user\\repo")); // backslash
298        assert!(!is_package_import("git@github.com:user/repo")); // colon (scp-style)
299        assert!(!is_package_import("C:/Users/path")); // Windows drive
300    }
301
302    // --- validate_package_spec tests ---
303
304    #[test]
305    fn test_validate_spec_valid() {
306        assert!(validate_package_spec("github.com/user/repo").is_ok());
307        assert!(validate_package_spec("gitlab.com/org/project/sub").is_ok());
308    }
309
310    #[test]
311    fn test_validate_spec_traversal() {
312        assert!(validate_package_spec("github.com/../../etc/passwd").is_err());
313        assert!(validate_package_spec("github.com/user/../../../etc").is_err());
314        assert!(validate_package_spec("../escape").is_err());
315        assert!(validate_package_spec("github.com/./user/repo").is_err());
316    }
317
318    #[test]
319    fn test_validate_spec_empty_segments() {
320        assert!(validate_package_spec("github.com//user/repo").is_err());
321        assert!(validate_package_spec("/github.com/user").is_err());
322    }
323
324    #[test]
325    fn test_validate_spec_schemes() {
326        assert!(validate_package_spec("https://github.com/user/repo").is_err());
327        assert!(validate_package_spec("ssh://git@host/repo").is_err());
328    }
329
330    #[test]
331    fn test_validate_spec_dangerous_chars() {
332        assert!(validate_package_spec("github.com\\user").is_err());
333        assert!(validate_package_spec("git@github.com:user/repo").is_err());
334    }
335
336    // --- resolve_package_import_in tests ---
337
338    #[test]
339    fn test_resolve_direct_file() {
340        let base = temp_packages_dir();
341        let pkg_path = base.join("github.com/user");
342        fs::create_dir_all(&pkg_path).unwrap();
343        fs::write(pkg_path.join("repo.sema"), "(define x 1)").unwrap();
344
345        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
346        assert_eq!(result, pkg_path.join("repo.sema"));
347    }
348
349    #[test]
350    fn test_resolve_package_sema() {
351        let base = temp_packages_dir();
352        let pkg_dir = base.join("github.com/user/repo");
353        fs::create_dir_all(&pkg_dir).unwrap();
354        fs::write(pkg_dir.join("package.sema"), "(define x 1)").unwrap();
355
356        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
357        assert_eq!(result, pkg_dir.join("package.sema"));
358    }
359
360    #[test]
361    fn test_resolve_custom_entrypoint() {
362        let base = temp_packages_dir();
363        let pkg_dir = base.join("github.com/user/repo");
364        fs::create_dir_all(&pkg_dir).unwrap();
365        fs::write(pkg_dir.join("sema.toml"), "entrypoint = \"lib.sema\"\n").unwrap();
366        fs::write(pkg_dir.join("lib.sema"), "(define x 1)").unwrap();
367
368        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
369        assert_eq!(result, pkg_dir.join("lib.sema"));
370    }
371
372    #[test]
373    fn test_resolve_custom_entrypoint_single_quotes() {
374        let base = temp_packages_dir();
375        let pkg_dir = base.join("github.com/user/repo");
376        fs::create_dir_all(&pkg_dir).unwrap();
377        fs::write(pkg_dir.join("sema.toml"), "entrypoint = 'main.sema'\n").unwrap();
378        fs::write(pkg_dir.join("main.sema"), "(define x 1)").unwrap();
379
380        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
381        assert_eq!(result, pkg_dir.join("main.sema"));
382    }
383
384    #[test]
385    fn test_resolve_entrypoint_with_inline_comment() {
386        let base = temp_packages_dir();
387        let pkg_dir = base.join("github.com/user/repo");
388        fs::create_dir_all(&pkg_dir).unwrap();
389        fs::write(
390            pkg_dir.join("sema.toml"),
391            "entrypoint = \"lib.sema\" # the main entry\n",
392        )
393        .unwrap();
394        fs::write(pkg_dir.join("lib.sema"), "(define x 1)").unwrap();
395
396        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
397        assert_eq!(result, pkg_dir.join("lib.sema"));
398    }
399
400    #[test]
401    fn test_resolve_entrypoint_traversal_rejected() {
402        let base = temp_packages_dir();
403        let pkg_dir = base.join("github.com/user/repo");
404        fs::create_dir_all(&pkg_dir).unwrap();
405        fs::write(
406            pkg_dir.join("sema.toml"),
407            "entrypoint = \"../../etc/passwd\"\n",
408        )
409        .unwrap();
410
411        let err = resolve_package_import_in("github.com/user/repo", &base).unwrap_err();
412        assert!(err.to_string().contains("invalid entrypoint"));
413    }
414
415    #[test]
416    fn test_resolve_not_found() {
417        let base = temp_packages_dir();
418        let err = resolve_package_import_in("github.com/user/repo", &base).unwrap_err();
419        assert!(err.to_string().contains("package not found"));
420        assert_eq!(err.hint(), Some("Run: sema pkg add github.com/user/repo"));
421    }
422
423    #[test]
424    fn test_resolve_traversal_rejected() {
425        let base = temp_packages_dir();
426        let err = resolve_package_import_in("github.com/../../etc/passwd", &base).unwrap_err();
427        assert!(err.to_string().contains("path traversal"));
428    }
429
430    #[test]
431    fn test_resolve_priority_direct_over_mod() {
432        let base = temp_packages_dir();
433        let parent = base.join("github.com/user");
434        fs::create_dir_all(&parent).unwrap();
435        fs::write(parent.join("repo.sema"), "direct").unwrap();
436
437        let pkg_dir = parent.join("repo");
438        fs::create_dir_all(&pkg_dir).unwrap();
439        fs::write(pkg_dir.join("package.sema"), "pkg").unwrap();
440
441        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
442        assert_eq!(result, parent.join("repo.sema"));
443    }
444
445    #[test]
446    fn test_resolve_entrypoint_fallback_to_package_sema() {
447        let base = temp_packages_dir();
448        let pkg_dir = base.join("github.com/user/repo");
449        fs::create_dir_all(&pkg_dir).unwrap();
450        // sema.toml exists but entrypoint file doesn't
451        fs::write(
452            pkg_dir.join("sema.toml"),
453            "entrypoint = \"nonexistent.sema\"\n",
454        )
455        .unwrap();
456        fs::write(pkg_dir.join("package.sema"), "(define x 1)").unwrap();
457
458        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
459        assert_eq!(result, pkg_dir.join("package.sema"));
460    }
461
462    #[test]
463    fn test_resolve_sema_toml_without_entrypoint_uses_package_sema() {
464        let base = temp_packages_dir();
465        let pkg_dir = base.join("github.com/user/repo");
466        fs::create_dir_all(&pkg_dir).unwrap();
467        // sema.toml exists but has no entrypoint key
468        fs::write(
469            pkg_dir.join("sema.toml"),
470            "[package]\nname = \"repo\"\nversion = \"1.0\"\n",
471        )
472        .unwrap();
473        fs::write(pkg_dir.join("package.sema"), "(define x 1)").unwrap();
474
475        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
476        assert_eq!(result, pkg_dir.join("package.sema"));
477    }
478
479    // --- verify_path_within tests (symlink escape) ---
480
481    #[cfg(unix)]
482    #[test]
483    fn test_resolve_package_sema_symlink_escape_rejected() {
484        let base = temp_packages_dir();
485        // Create a target file outside the packages directory
486        let outside = base.parent().unwrap().join(format!(
487            "sema-escape-target-{}",
488            TEST_COUNTER.fetch_add(1, Ordering::SeqCst)
489        ));
490        fs::create_dir_all(&outside).unwrap();
491        fs::write(outside.join("package.sema"), "pwned").unwrap();
492
493        // Create a symlink inside packages that points outside
494        let pkg_dir = base.join("github.com/user/evil");
495        fs::create_dir_all(pkg_dir.parent().unwrap()).unwrap();
496        std::os::unix::fs::symlink(&outside, &pkg_dir).unwrap();
497
498        let err = resolve_package_import_in("github.com/user/evil", &base).unwrap_err();
499        assert!(
500            err.to_string().contains("escapes"),
501            "expected escape error, got: {err}"
502        );
503
504        let _ = fs::remove_dir_all(&outside);
505    }
506
507    #[cfg(unix)]
508    #[test]
509    fn test_resolve_entrypoint_symlink_escape_rejected() {
510        let base = temp_packages_dir();
511        // Create a target file outside the packages directory
512        let outside_file = base.parent().unwrap().join(format!(
513            "sema-escape-entry-{}.sema",
514            TEST_COUNTER.fetch_add(1, Ordering::SeqCst)
515        ));
516        fs::write(&outside_file, "pwned").unwrap();
517
518        // Create a package with a sema.toml pointing to a symlinked file
519        let pkg_dir = base.join("github.com/user/tricky");
520        fs::create_dir_all(&pkg_dir).unwrap();
521        fs::write(pkg_dir.join("sema.toml"), "entrypoint = \"entry.sema\"\n").unwrap();
522        std::os::unix::fs::symlink(&outside_file, pkg_dir.join("entry.sema")).unwrap();
523
524        let err = resolve_package_import_in("github.com/user/tricky", &base).unwrap_err();
525        assert!(
526            err.to_string().contains("escapes"),
527            "expected escape error, got: {err}"
528        );
529
530        let _ = fs::remove_file(&outside_file);
531    }
532
533    // --- PackagePath tests ---
534
535    #[test]
536    fn test_package_path_valid() {
537        let p = PackagePath::parse("github.com/user/repo").unwrap();
538        assert_eq!(p.as_str(), "github.com/user/repo");
539    }
540
541    #[test]
542    fn test_package_path_rejects_traversal() {
543        assert!(PackagePath::parse("github.com/../../etc/passwd").is_err());
544    }
545
546    #[test]
547    fn test_package_path_display() {
548        let p = PackagePath::parse("github.com/user/repo").unwrap();
549        assert_eq!(format!("{p}"), "github.com/user/repo");
550    }
551
552    // --- PackageSpec tests ---
553
554    #[test]
555    fn test_package_spec_with_ref() {
556        let s = PackageSpec::parse("github.com/user/repo@v1.0").unwrap();
557        assert_eq!(s.path.as_str(), "github.com/user/repo");
558        assert_eq!(s.git_ref, "v1.0");
559    }
560
561    #[test]
562    fn test_package_spec_no_ref_defaults_main() {
563        let s = PackageSpec::parse("github.com/user/repo").unwrap();
564        assert_eq!(s.git_ref, "main");
565    }
566
567    #[test]
568    fn test_package_spec_clone_url() {
569        let s = PackageSpec::parse("github.com/user/repo@v1.0").unwrap();
570        assert_eq!(s.clone_url(), "https://github.com/user/repo.git");
571    }
572
573    #[test]
574    fn test_package_spec_dest_dir() {
575        let s = PackageSpec::parse("github.com/user/repo").unwrap();
576        let base = PathBuf::from("/home/user/.sema/packages");
577        assert_eq!(
578            s.dest_dir(&base),
579            PathBuf::from("/home/user/.sema/packages/github.com/user/repo")
580        );
581    }
582
583    #[test]
584    fn test_package_spec_rejects_empty_ref() {
585        assert!(PackageSpec::parse("github.com/user/repo@").is_err());
586    }
587
588    #[test]
589    fn test_package_spec_rejects_traversal_in_path() {
590        assert!(PackageSpec::parse("github.com/../../etc/passwd@main").is_err());
591    }
592
593    #[test]
594    fn parse_entrypoint_ignores_non_package_table() {
595        let dir = temp_packages_dir();
596        let toml_content = "[tool]\nentrypoint = \"tool.sema\"\n";
597        fs::write(dir.join("sema.toml"), toml_content).unwrap();
598        let result = parse_entrypoint(&dir.join("sema.toml"));
599        assert_eq!(
600            result, None,
601            "should not pick up entrypoint from [tool] table"
602        );
603        let _ = fs::remove_dir_all(&dir);
604    }
605
606    #[test]
607    fn parse_entrypoint_reads_from_package_table() {
608        let dir = temp_packages_dir();
609        let toml_content = "[package]\nentrypoint = \"lib.sema\"\n";
610        fs::write(dir.join("sema.toml"), toml_content).unwrap();
611        let result = parse_entrypoint(&dir.join("sema.toml"));
612        assert_eq!(result, Some("lib.sema".to_string()));
613        let _ = fs::remove_dir_all(&dir);
614    }
615
616    #[test]
617    fn parse_entrypoint_reads_top_level() {
618        let dir = temp_packages_dir();
619        let toml_content = "entrypoint = \"main.sema\"\n[deps]\nfoo = \"1.0\"\n";
620        fs::write(dir.join("sema.toml"), toml_content).unwrap();
621        let result = parse_entrypoint(&dir.join("sema.toml"));
622        assert_eq!(result, Some("main.sema".to_string()));
623        let _ = fs::remove_dir_all(&dir);
624    }
625}