Skip to main content

sema_core/
resolve.rs

1use std::path::{Path, PathBuf};
2
3use crate::error::SemaError;
4use crate::home::sema_home;
5
6/// Returns the packages directory: `sema_home()/packages/`.
7pub fn packages_dir() -> PathBuf {
8    sema_home().join("packages")
9}
10
11/// Determines if an import spec is a package path vs a file path.
12///
13/// Package paths either:
14/// - Contain `/` with a hostname-like first segment (e.g., `github.com/user/repo`)
15/// - Are short names that exist in `~/.sema/packages/` (e.g., `http-helpers`)
16///
17/// Rejects relative paths (`./`, `../`), `.sema` extensions, absolute paths,
18/// URLs with schemes (`://`), backslashes, and colons.
19pub fn is_package_import(spec: &str) -> bool {
20    if spec.starts_with("./")
21        || spec.starts_with("../")
22        || spec.ends_with(".sema")
23        || spec.starts_with('/')
24        || spec.contains("://")
25        || spec.contains('\\')
26        || spec.contains(':')
27    {
28        return false;
29    }
30
31    // Classic git-style: contains / (e.g., github.com/user/repo)
32    if spec.contains('/') {
33        return true;
34    }
35
36    // Registry-style short name: check if it exists in the packages directory
37    packages_dir().join(spec).is_dir()
38}
39
40/// Validate that a package spec contains no path traversal or dangerous segments.
41///
42/// Rejects: `..` segments, empty segments, schemes, backslashes, colons, NUL bytes.
43pub fn validate_package_spec(spec: &str) -> Result<(), SemaError> {
44    if spec.contains("://") {
45        return Err(SemaError::eval(format!(
46            "invalid package spec: URL schemes not allowed: {spec}"
47        ))
48        .with_hint("Use bare host/path format, e.g.: github.com/user/repo"));
49    }
50    if spec.starts_with('/') {
51        return Err(SemaError::eval(format!(
52            "invalid package spec: absolute paths not allowed: {spec}"
53        ))
54        .with_hint("Use bare host/path format, e.g.: github.com/user/repo"));
55    }
56    if spec.contains('\\') {
57        return Err(SemaError::eval(format!(
58            "invalid package spec: backslashes not allowed: {spec}"
59        )));
60    }
61    if spec.contains(':') {
62        return Err(SemaError::eval(format!(
63            "invalid package spec: colons not allowed: {spec}"
64        )));
65    }
66    if spec.contains('\0') {
67        return Err(SemaError::eval(
68            "invalid package spec: NUL byte not allowed".to_string(),
69        ));
70    }
71    for segment in spec.split('/') {
72        if segment.is_empty() || segment == "." || segment == ".." {
73            return Err(SemaError::eval(format!(
74                "invalid package spec: path traversal not allowed: {spec}"
75            )));
76        }
77    }
78    Ok(())
79}
80
81/// A validated package path (e.g., "github.com/user/repo").
82///
83/// Construction via `parse()` ensures the path has no traversal,
84/// schemes, backslashes, colons, or empty segments.
85#[derive(Debug, Clone, PartialEq, Eq, Hash)]
86pub struct PackagePath(String);
87
88impl PackagePath {
89    pub fn parse(s: &str) -> Result<Self, SemaError> {
90        validate_package_spec(s)?;
91        Ok(Self(s.to_string()))
92    }
93
94    pub fn as_str(&self) -> &str {
95        &self.0
96    }
97}
98
99impl std::fmt::Display for PackagePath {
100    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101        f.write_str(&self.0)
102    }
103}
104
105/// A parsed package spec: validated path + git ref (e.g., "github.com/user/repo@v1.0").
106///
107/// The git ref defaults to "main" when no `@ref` suffix is present.
108#[derive(Debug, Clone, PartialEq, Eq)]
109pub struct PackageSpec {
110    pub path: PackagePath,
111    pub git_ref: String,
112}
113
114impl PackageSpec {
115    pub fn parse(spec: &str) -> Result<Self, SemaError> {
116        let (path_str, git_ref) = if let Some((p, r)) = spec.rsplit_once('@') {
117            (p, r)
118        } else {
119            (spec, "main")
120        };
121
122        let path = PackagePath::parse(path_str)?;
123
124        if git_ref.is_empty() {
125            return Err(
126                SemaError::eval(format!("invalid package spec: empty git ref: {spec}"))
127                    .with_hint("Provide a ref after @, e.g.: github.com/user/repo@v1.0"),
128            );
129        }
130        if git_ref.contains('\0') {
131            return Err(SemaError::eval(
132                "invalid package spec: NUL byte in git ref".to_string(),
133            ));
134        }
135
136        Ok(Self {
137            path,
138            git_ref: git_ref.to_string(),
139        })
140    }
141
142    pub fn clone_url(&self) -> String {
143        format!("https://{}.git", self.path.as_str())
144    }
145
146    pub fn dest_dir(&self, packages_dir: &Path) -> PathBuf {
147        packages_dir.join(self.path.as_str())
148    }
149}
150
151/// Resolves a package spec to a filesystem path.
152///
153/// Resolution order:
154/// 1. `~/.sema/packages/<spec>.sema` (sub-module import)
155/// 2. `~/.sema/packages/<spec>/sema.toml` → custom entrypoint
156/// 3. `~/.sema/packages/<spec>/package.sema` (default entrypoint)
157pub fn resolve_package_import(spec: &str) -> Result<PathBuf, SemaError> {
158    resolve_package_import_in(spec, &packages_dir())
159}
160
161/// Resolves a package spec against a given packages directory.
162pub fn resolve_package_import_in(spec: &str, base: &Path) -> Result<PathBuf, SemaError> {
163    validate_package_spec(spec)?;
164
165    // 1. Direct file: <packages>/<spec>.sema
166    let direct = base.join(format!("{spec}.sema"));
167    if direct.is_file() {
168        verify_path_within(base, &direct)?;
169        return Ok(direct);
170    }
171
172    let pkg_dir = base.join(spec);
173
174    // 2. sema.toml with custom entrypoint
175    let toml_path = pkg_dir.join("sema.toml");
176    if toml_path.is_file() {
177        if let Some(entrypoint) = parse_entrypoint(&toml_path) {
178            // Validate the entrypoint itself doesn't escape the package dir
179            if entrypoint.contains("..") || entrypoint.starts_with('/') {
180                return Err(SemaError::eval(format!(
181                    "invalid entrypoint in {}: {entrypoint}",
182                    toml_path.display()
183                )));
184            }
185            let entry = pkg_dir.join(&entrypoint);
186            if entry.is_file() {
187                verify_path_within(base, &entry)?;
188                return Ok(entry);
189            }
190        }
191    }
192
193    // 3. Default entrypoint: package.sema
194    let mod_file = pkg_dir.join("package.sema");
195    if mod_file.is_file() {
196        verify_path_within(base, &mod_file)?;
197        return Ok(mod_file);
198    }
199
200    Err(SemaError::eval(format!("package not found: {spec}"))
201        .with_hint(format!("Run: sema pkg add {spec}")))
202}
203
204/// Verify that a resolved path stays within the expected base directory.
205fn verify_path_within(base: &Path, resolved: &Path) -> Result<(), SemaError> {
206    // Use canonicalize if both paths exist, otherwise check lexically
207    if let (Ok(canon_base), Ok(canon_resolved)) = (base.canonicalize(), resolved.canonicalize()) {
208        if !canon_resolved.starts_with(&canon_base) {
209            return Err(SemaError::eval(
210                "package path escapes packages directory".to_string(),
211            ));
212        }
213    }
214    Ok(())
215}
216
217/// Parse `entrypoint = "..."` from a sema.toml file.
218///
219/// Checks `[package].entrypoint` first, then falls back to a top-level `entrypoint` key.
220/// Ignores `entrypoint` keys in any other table (e.g. `[tool]`).
221fn parse_entrypoint(path: &Path) -> Option<String> {
222    let contents = std::fs::read_to_string(path).ok()?;
223    let doc: toml::Value = toml::from_str(&contents).ok()?;
224
225    // Check [package].entrypoint first
226    if let Some(ep) = doc
227        .get("package")
228        .and_then(|p| p.get("entrypoint"))
229        .and_then(|v| v.as_str())
230    {
231        return Some(ep.to_string());
232    }
233
234    // Fall back to top-level entrypoint
235    if let Some(ep) = doc.get("entrypoint").and_then(|v| v.as_str()) {
236        return Some(ep.to_string());
237    }
238
239    None
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use std::fs;
246
247    use std::sync::atomic::{AtomicU64, Ordering};
248
249    static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
250
251    /// Create a unique temp packages directory for testing.
252    fn temp_packages_dir() -> PathBuf {
253        let id = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
254        let dir =
255            std::env::temp_dir().join(format!("sema-resolve-test-{}-{}", std::process::id(), id));
256        let _ = fs::remove_dir_all(&dir);
257        fs::create_dir_all(&dir).unwrap();
258        dir
259    }
260
261    // --- is_package_import tests ---
262
263    #[test]
264    fn test_is_package_import_valid() {
265        assert!(is_package_import("github.com/user/repo"));
266        assert!(is_package_import("github.com/user/repo/sub"));
267        assert!(is_package_import("gitlab.com/org/project"));
268    }
269
270    #[test]
271    fn test_is_package_import_file_paths() {
272        assert!(!is_package_import("./utils.sema"));
273        assert!(!is_package_import("../lib/utils.sema"));
274        assert!(!is_package_import("utils.sema"));
275        assert!(!is_package_import("/absolute/path.sema"));
276        assert!(!is_package_import("single-word"));
277        assert!(!is_package_import("github.com/user/repo.sema"));
278    }
279
280    #[test]
281    fn test_is_package_import_rejects_schemes() {
282        assert!(!is_package_import("https://github.com/user/repo"));
283        assert!(!is_package_import("http://example.com/pkg"));
284        assert!(!is_package_import("ssh://git@github.com/user/repo"));
285    }
286
287    #[test]
288    fn test_is_package_import_rejects_dangerous() {
289        assert!(!is_package_import("github.com\\user\\repo")); // backslash
290        assert!(!is_package_import("git@github.com:user/repo")); // colon (scp-style)
291        assert!(!is_package_import("C:/Users/path")); // Windows drive
292    }
293
294    // --- validate_package_spec tests ---
295
296    #[test]
297    fn test_validate_spec_valid() {
298        assert!(validate_package_spec("github.com/user/repo").is_ok());
299        assert!(validate_package_spec("gitlab.com/org/project/sub").is_ok());
300    }
301
302    #[test]
303    fn test_validate_spec_traversal() {
304        assert!(validate_package_spec("github.com/../../etc/passwd").is_err());
305        assert!(validate_package_spec("github.com/user/../../../etc").is_err());
306        assert!(validate_package_spec("../escape").is_err());
307        assert!(validate_package_spec("github.com/./user/repo").is_err());
308    }
309
310    #[test]
311    fn test_validate_spec_empty_segments() {
312        assert!(validate_package_spec("github.com//user/repo").is_err());
313        assert!(validate_package_spec("/github.com/user").is_err());
314    }
315
316    #[test]
317    fn test_validate_spec_schemes() {
318        assert!(validate_package_spec("https://github.com/user/repo").is_err());
319        assert!(validate_package_spec("ssh://git@host/repo").is_err());
320    }
321
322    #[test]
323    fn test_validate_spec_dangerous_chars() {
324        assert!(validate_package_spec("github.com\\user").is_err());
325        assert!(validate_package_spec("git@github.com:user/repo").is_err());
326    }
327
328    // --- resolve_package_import_in tests ---
329
330    #[test]
331    fn test_resolve_direct_file() {
332        let base = temp_packages_dir();
333        let pkg_path = base.join("github.com/user");
334        fs::create_dir_all(&pkg_path).unwrap();
335        fs::write(pkg_path.join("repo.sema"), "(define x 1)").unwrap();
336
337        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
338        assert_eq!(result, pkg_path.join("repo.sema"));
339    }
340
341    #[test]
342    fn test_resolve_package_sema() {
343        let base = temp_packages_dir();
344        let pkg_dir = base.join("github.com/user/repo");
345        fs::create_dir_all(&pkg_dir).unwrap();
346        fs::write(pkg_dir.join("package.sema"), "(define x 1)").unwrap();
347
348        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
349        assert_eq!(result, pkg_dir.join("package.sema"));
350    }
351
352    #[test]
353    fn test_resolve_custom_entrypoint() {
354        let base = temp_packages_dir();
355        let pkg_dir = base.join("github.com/user/repo");
356        fs::create_dir_all(&pkg_dir).unwrap();
357        fs::write(pkg_dir.join("sema.toml"), "entrypoint = \"lib.sema\"\n").unwrap();
358        fs::write(pkg_dir.join("lib.sema"), "(define x 1)").unwrap();
359
360        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
361        assert_eq!(result, pkg_dir.join("lib.sema"));
362    }
363
364    #[test]
365    fn test_resolve_custom_entrypoint_single_quotes() {
366        let base = temp_packages_dir();
367        let pkg_dir = base.join("github.com/user/repo");
368        fs::create_dir_all(&pkg_dir).unwrap();
369        fs::write(pkg_dir.join("sema.toml"), "entrypoint = 'main.sema'\n").unwrap();
370        fs::write(pkg_dir.join("main.sema"), "(define x 1)").unwrap();
371
372        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
373        assert_eq!(result, pkg_dir.join("main.sema"));
374    }
375
376    #[test]
377    fn test_resolve_entrypoint_with_inline_comment() {
378        let base = temp_packages_dir();
379        let pkg_dir = base.join("github.com/user/repo");
380        fs::create_dir_all(&pkg_dir).unwrap();
381        fs::write(
382            pkg_dir.join("sema.toml"),
383            "entrypoint = \"lib.sema\" # the main entry\n",
384        )
385        .unwrap();
386        fs::write(pkg_dir.join("lib.sema"), "(define x 1)").unwrap();
387
388        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
389        assert_eq!(result, pkg_dir.join("lib.sema"));
390    }
391
392    #[test]
393    fn test_resolve_entrypoint_traversal_rejected() {
394        let base = temp_packages_dir();
395        let pkg_dir = base.join("github.com/user/repo");
396        fs::create_dir_all(&pkg_dir).unwrap();
397        fs::write(
398            pkg_dir.join("sema.toml"),
399            "entrypoint = \"../../etc/passwd\"\n",
400        )
401        .unwrap();
402
403        let err = resolve_package_import_in("github.com/user/repo", &base).unwrap_err();
404        assert!(err.to_string().contains("invalid entrypoint"));
405    }
406
407    #[test]
408    fn test_resolve_not_found() {
409        let base = temp_packages_dir();
410        let err = resolve_package_import_in("github.com/user/repo", &base).unwrap_err();
411        assert!(err.to_string().contains("package not found"));
412        assert_eq!(err.hint(), Some("Run: sema pkg add github.com/user/repo"));
413    }
414
415    #[test]
416    fn test_resolve_traversal_rejected() {
417        let base = temp_packages_dir();
418        let err = resolve_package_import_in("github.com/../../etc/passwd", &base).unwrap_err();
419        assert!(err.to_string().contains("path traversal"));
420    }
421
422    #[test]
423    fn test_resolve_priority_direct_over_mod() {
424        let base = temp_packages_dir();
425        let parent = base.join("github.com/user");
426        fs::create_dir_all(&parent).unwrap();
427        fs::write(parent.join("repo.sema"), "direct").unwrap();
428
429        let pkg_dir = parent.join("repo");
430        fs::create_dir_all(&pkg_dir).unwrap();
431        fs::write(pkg_dir.join("package.sema"), "pkg").unwrap();
432
433        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
434        assert_eq!(result, parent.join("repo.sema"));
435    }
436
437    #[test]
438    fn test_resolve_entrypoint_fallback_to_package_sema() {
439        let base = temp_packages_dir();
440        let pkg_dir = base.join("github.com/user/repo");
441        fs::create_dir_all(&pkg_dir).unwrap();
442        // sema.toml exists but entrypoint file doesn't
443        fs::write(
444            pkg_dir.join("sema.toml"),
445            "entrypoint = \"nonexistent.sema\"\n",
446        )
447        .unwrap();
448        fs::write(pkg_dir.join("package.sema"), "(define x 1)").unwrap();
449
450        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
451        assert_eq!(result, pkg_dir.join("package.sema"));
452    }
453
454    #[test]
455    fn test_resolve_sema_toml_without_entrypoint_uses_package_sema() {
456        let base = temp_packages_dir();
457        let pkg_dir = base.join("github.com/user/repo");
458        fs::create_dir_all(&pkg_dir).unwrap();
459        // sema.toml exists but has no entrypoint key
460        fs::write(
461            pkg_dir.join("sema.toml"),
462            "[package]\nname = \"repo\"\nversion = \"1.0\"\n",
463        )
464        .unwrap();
465        fs::write(pkg_dir.join("package.sema"), "(define x 1)").unwrap();
466
467        let result = resolve_package_import_in("github.com/user/repo", &base).unwrap();
468        assert_eq!(result, pkg_dir.join("package.sema"));
469    }
470
471    // --- verify_path_within tests (symlink escape) ---
472
473    #[cfg(unix)]
474    #[test]
475    fn test_resolve_package_sema_symlink_escape_rejected() {
476        let base = temp_packages_dir();
477        // Create a target file outside the packages directory
478        let outside = base.parent().unwrap().join(format!(
479            "sema-escape-target-{}",
480            TEST_COUNTER.fetch_add(1, Ordering::SeqCst)
481        ));
482        fs::create_dir_all(&outside).unwrap();
483        fs::write(outside.join("package.sema"), "pwned").unwrap();
484
485        // Create a symlink inside packages that points outside
486        let pkg_dir = base.join("github.com/user/evil");
487        fs::create_dir_all(pkg_dir.parent().unwrap()).unwrap();
488        std::os::unix::fs::symlink(&outside, &pkg_dir).unwrap();
489
490        let err = resolve_package_import_in("github.com/user/evil", &base).unwrap_err();
491        assert!(
492            err.to_string().contains("escapes"),
493            "expected escape error, got: {err}"
494        );
495
496        let _ = fs::remove_dir_all(&outside);
497    }
498
499    #[cfg(unix)]
500    #[test]
501    fn test_resolve_entrypoint_symlink_escape_rejected() {
502        let base = temp_packages_dir();
503        // Create a target file outside the packages directory
504        let outside_file = base.parent().unwrap().join(format!(
505            "sema-escape-entry-{}.sema",
506            TEST_COUNTER.fetch_add(1, Ordering::SeqCst)
507        ));
508        fs::write(&outside_file, "pwned").unwrap();
509
510        // Create a package with a sema.toml pointing to a symlinked file
511        let pkg_dir = base.join("github.com/user/tricky");
512        fs::create_dir_all(&pkg_dir).unwrap();
513        fs::write(pkg_dir.join("sema.toml"), "entrypoint = \"entry.sema\"\n").unwrap();
514        std::os::unix::fs::symlink(&outside_file, pkg_dir.join("entry.sema")).unwrap();
515
516        let err = resolve_package_import_in("github.com/user/tricky", &base).unwrap_err();
517        assert!(
518            err.to_string().contains("escapes"),
519            "expected escape error, got: {err}"
520        );
521
522        let _ = fs::remove_file(&outside_file);
523    }
524
525    // --- PackagePath tests ---
526
527    #[test]
528    fn test_package_path_valid() {
529        let p = PackagePath::parse("github.com/user/repo").unwrap();
530        assert_eq!(p.as_str(), "github.com/user/repo");
531    }
532
533    #[test]
534    fn test_package_path_rejects_traversal() {
535        assert!(PackagePath::parse("github.com/../../etc/passwd").is_err());
536    }
537
538    #[test]
539    fn test_package_path_display() {
540        let p = PackagePath::parse("github.com/user/repo").unwrap();
541        assert_eq!(format!("{p}"), "github.com/user/repo");
542    }
543
544    // --- PackageSpec tests ---
545
546    #[test]
547    fn test_package_spec_with_ref() {
548        let s = PackageSpec::parse("github.com/user/repo@v1.0").unwrap();
549        assert_eq!(s.path.as_str(), "github.com/user/repo");
550        assert_eq!(s.git_ref, "v1.0");
551    }
552
553    #[test]
554    fn test_package_spec_no_ref_defaults_main() {
555        let s = PackageSpec::parse("github.com/user/repo").unwrap();
556        assert_eq!(s.git_ref, "main");
557    }
558
559    #[test]
560    fn test_package_spec_clone_url() {
561        let s = PackageSpec::parse("github.com/user/repo@v1.0").unwrap();
562        assert_eq!(s.clone_url(), "https://github.com/user/repo.git");
563    }
564
565    #[test]
566    fn test_package_spec_dest_dir() {
567        let s = PackageSpec::parse("github.com/user/repo").unwrap();
568        let base = PathBuf::from("/home/user/.sema/packages");
569        assert_eq!(
570            s.dest_dir(&base),
571            PathBuf::from("/home/user/.sema/packages/github.com/user/repo")
572        );
573    }
574
575    #[test]
576    fn test_package_spec_rejects_empty_ref() {
577        assert!(PackageSpec::parse("github.com/user/repo@").is_err());
578    }
579
580    #[test]
581    fn test_package_spec_rejects_traversal_in_path() {
582        assert!(PackageSpec::parse("github.com/../../etc/passwd@main").is_err());
583    }
584
585    #[test]
586    fn parse_entrypoint_ignores_non_package_table() {
587        let dir = temp_packages_dir();
588        let toml_content = "[tool]\nentrypoint = \"tool.sema\"\n";
589        fs::write(dir.join("sema.toml"), toml_content).unwrap();
590        let result = parse_entrypoint(&dir.join("sema.toml"));
591        assert_eq!(
592            result, None,
593            "should not pick up entrypoint from [tool] table"
594        );
595        let _ = fs::remove_dir_all(&dir);
596    }
597
598    #[test]
599    fn parse_entrypoint_reads_from_package_table() {
600        let dir = temp_packages_dir();
601        let toml_content = "[package]\nentrypoint = \"lib.sema\"\n";
602        fs::write(dir.join("sema.toml"), toml_content).unwrap();
603        let result = parse_entrypoint(&dir.join("sema.toml"));
604        assert_eq!(result, Some("lib.sema".to_string()));
605        let _ = fs::remove_dir_all(&dir);
606    }
607
608    #[test]
609    fn parse_entrypoint_reads_top_level() {
610        let dir = temp_packages_dir();
611        let toml_content = "entrypoint = \"main.sema\"\n[deps]\nfoo = \"1.0\"\n";
612        fs::write(dir.join("sema.toml"), toml_content).unwrap();
613        let result = parse_entrypoint(&dir.join("sema.toml"));
614        assert_eq!(result, Some("main.sema".to_string()));
615        let _ = fs::remove_dir_all(&dir);
616    }
617}