Skip to main content

shape_vm/
bundle_compiler.rs

1//! Bundle compiler for producing distributable .shapec packages
2//!
3//! Takes a ProjectRoot and compiles all .shape files into a PackageBundle.
4
5use crate::bytecode::BytecodeProgram;
6use crate::compiler::BytecodeCompiler;
7use sha2::{Digest, Sha256};
8use shape_ast::parser::parse_program;
9use shape_runtime::module_manifest::ModuleManifest;
10use shape_runtime::package_bundle::{
11    BundleMetadata, BundledModule, BundledNativeDependencyScope, PackageBundle,
12};
13use shape_runtime::project::ProjectRoot;
14use std::collections::{HashMap, HashSet, VecDeque};
15use std::path::{Path, PathBuf};
16use std::time::SystemTime;
17
18/// Compiles an entire Shape project into a PackageBundle.
19pub struct BundleCompiler;
20
21impl BundleCompiler {
22    /// Compile all .shape files in a project to a PackageBundle.
23    pub fn compile(project: &ProjectRoot) -> Result<PackageBundle, String> {
24        let root = &project.root_path;
25
26        // 1. Discover all .shape files
27        let shape_files = discover_shape_files(root, project)?;
28
29        if shape_files.is_empty() {
30            return Err("No .shape files found in project".to_string());
31        }
32
33        // 2. Compile each file
34        let mut modules = Vec::new();
35        let mut all_sources = String::new();
36
37        for (file_path, module_path) in &shape_files {
38            let source = std::fs::read_to_string(file_path)
39                .map_err(|e| format!("Failed to read '{}': {}", file_path.display(), e))?;
40
41            // Hash individual source
42            let mut hasher = Sha256::new();
43            hasher.update(source.as_bytes());
44            let source_hash = format!("{:x}", hasher.finalize());
45
46            // Accumulate for combined hash
47            all_sources.push_str(&source);
48
49            // Parse
50            let ast = parse_program(&source)
51                .map_err(|e| format!("Failed to parse '{}': {}", file_path.display(), e))?;
52
53            // Collect export names from AST
54            let export_names = collect_export_names(&ast);
55
56            // Compile to bytecode (BytecodeCompiler::compile consumes self)
57            let compiler = BytecodeCompiler::new();
58            let bytecode = compiler
59                .compile(&ast)
60                .map_err(|e| format!("Failed to compile '{}': {}", file_path.display(), e))?;
61
62            // Serialize bytecode to MessagePack
63            let bytecode_bytes = rmp_serde::to_vec(&bytecode).map_err(|e| {
64                format!(
65                    "Failed to serialize bytecode for '{}': {}",
66                    file_path.display(),
67                    e
68                )
69            })?;
70
71            modules.push(BundledModule {
72                module_path: module_path.clone(),
73                bytecode_bytes,
74                export_names,
75                source_hash,
76            });
77        }
78
79        // 3. Compute combined source hash
80        let mut hasher = Sha256::new();
81        hasher.update(all_sources.as_bytes());
82        let source_hash = format!("{:x}", hasher.finalize());
83
84        // 4. Collect dependency versions
85        let mut dependencies = HashMap::new();
86        for (name, spec) in &project.config.dependencies {
87            let version = match spec {
88                shape_runtime::project::DependencySpec::Version(v) => v.clone(),
89                shape_runtime::project::DependencySpec::Detailed(d) => {
90                    d.version.clone().unwrap_or_else(|| "local".to_string())
91                }
92            };
93            dependencies.insert(name.clone(), version);
94        }
95
96        let native_dependency_scopes = collect_native_dependency_scopes(root, &project.config)
97            .map_err(|e| {
98                format!(
99                    "Failed to collect transitive native dependency scopes for bundle: {}",
100                    e
101                )
102            })?;
103        let native_portable = native_dependency_scopes
104            .iter()
105            .all(native_dependency_scope_is_portable);
106
107        // 5. Build metadata
108        let built_at = SystemTime::now()
109            .duration_since(SystemTime::UNIX_EPOCH)
110            .map(|d| d.as_secs())
111            .unwrap_or(0);
112
113        let metadata = BundleMetadata {
114            name: project.config.project.name.clone(),
115            version: project.config.project.version.clone(),
116            compiler_version: env!("CARGO_PKG_VERSION").to_string(),
117            source_hash,
118            bundle_kind: "portable-bytecode".to_string(),
119            build_host: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS),
120            native_portable,
121            entry_module: project
122                .config
123                .project
124                .entry
125                .as_ref()
126                .map(|e| path_to_module_path(Path::new(e), root)),
127            built_at,
128        };
129
130        // 6. Extract content-addressed blobs and build manifests
131        let mut blob_store: HashMap<[u8; 32], Vec<u8>> = HashMap::new();
132        let mut manifests: Vec<ModuleManifest> = Vec::new();
133
134        for bundled_module in &modules {
135            // Deserialize the bytecode to access content_addressed metadata
136            let program: BytecodeProgram =
137                match rmp_serde::from_slice(&bundled_module.bytecode_bytes) {
138                    Ok(p) => p,
139                    Err(_) => continue, // Skip if deserialization fails
140                };
141
142            if let Some(ref ca) = program.content_addressed {
143                // Extract blobs into blob_store
144                for (hash, blob) in &ca.function_store {
145                    if let Ok(blob_bytes) = rmp_serde::to_vec(blob) {
146                        blob_store.insert(hash.0, blob_bytes);
147                    }
148                }
149
150                // Build manifest for this module
151                let mut manifest = ModuleManifest::new(
152                    bundled_module.module_path.clone(),
153                    metadata.version.clone(),
154                );
155
156                // Map export names to their function hashes
157                for export_name in &bundled_module.export_names {
158                    for (hash, blob) in &ca.function_store {
159                        if blob.name == *export_name {
160                            manifest.add_export(export_name.clone(), hash.0);
161                            break;
162                        }
163                    }
164                }
165
166                // Collect type schemas referenced by function blobs
167                let mut seen_schemas = std::collections::HashSet::new();
168                for (_hash, blob) in &ca.function_store {
169                    for schema_name in &blob.type_schemas {
170                        if seen_schemas.insert(schema_name.clone()) {
171                            use sha2::{Digest, Sha256};
172                            let schema_hash = Sha256::digest(schema_name.as_bytes());
173                            let mut hash_bytes = [0u8; 32];
174                            hash_bytes.copy_from_slice(&schema_hash);
175                            manifest.add_type_schema(schema_name.clone(), hash_bytes);
176                        }
177                    }
178                }
179
180                // Build transitive dependency closure for each export
181                for (_export_name, export_hash) in &manifest.exports {
182                    let mut closure = Vec::new();
183                    let mut visited = std::collections::HashSet::new();
184                    let mut queue = vec![*export_hash];
185                    while let Some(h) = queue.pop() {
186                        if !visited.insert(h) {
187                            continue;
188                        }
189                        if let Some(blob) = ca.function_store.get(&crate::bytecode::FunctionHash(h))
190                        {
191                            for dep in &blob.dependencies {
192                                closure.push(dep.0);
193                                queue.push(dep.0);
194                            }
195                        }
196                    }
197                    closure.sort();
198                    closure.dedup();
199                    manifest.dependency_closure.insert(*export_hash, closure);
200                }
201
202                manifest.finalize();
203                manifests.push(manifest);
204            }
205        }
206
207        Ok(PackageBundle {
208            metadata,
209            modules,
210            dependencies,
211            blob_store,
212            manifests,
213            native_dependency_scopes,
214        })
215    }
216}
217
218fn normalize_package_identity(
219    project: &shape_runtime::project::ShapeProject,
220    fallback_name: &str,
221    fallback_version: &str,
222) -> (String, String, String) {
223    let package_name = if project.project.name.trim().is_empty() {
224        fallback_name.to_string()
225    } else {
226        project.project.name.trim().to_string()
227    };
228    let package_version = if project.project.version.trim().is_empty() {
229        fallback_version.to_string()
230    } else {
231        project.project.version.trim().to_string()
232    };
233    let package_key = format!("{package_name}@{package_version}");
234    (package_name, package_version, package_key)
235}
236
237fn merge_native_scope(
238    scopes: &mut HashMap<String, BundledNativeDependencyScope>,
239    scope: BundledNativeDependencyScope,
240) {
241    if let Some(existing) = scopes.get_mut(&scope.package_key) {
242        existing.dependencies.extend(scope.dependencies);
243        return;
244    }
245    scopes.insert(scope.package_key.clone(), scope);
246}
247
248fn collect_native_dependency_scopes(
249    root_path: &Path,
250    project: &shape_runtime::project::ShapeProject,
251) -> Result<Vec<BundledNativeDependencyScope>, String> {
252    let fallback_root_name = root_path
253        .file_name()
254        .and_then(|name| name.to_str())
255        .filter(|name| !name.is_empty())
256        .unwrap_or("root");
257    let (root_name, root_version, root_key) =
258        normalize_package_identity(project, fallback_root_name, "0.0.0");
259
260    let mut queue: VecDeque<(
261        PathBuf,
262        shape_runtime::project::ShapeProject,
263        String,
264        String,
265        String,
266    )> = VecDeque::new();
267    queue.push_back((
268        root_path.to_path_buf(),
269        project.clone(),
270        root_name,
271        root_version,
272        root_key,
273    ));
274
275    let mut scopes_by_key: HashMap<String, BundledNativeDependencyScope> = HashMap::new();
276    let mut visited_roots: HashSet<PathBuf> = HashSet::new();
277
278    while let Some((package_root, package, package_name, package_version, package_key)) =
279        queue.pop_front()
280    {
281        let canonical_root = package_root
282            .canonicalize()
283            .unwrap_or_else(|_| package_root.clone());
284        if !visited_roots.insert(canonical_root.clone()) {
285            continue;
286        }
287
288        let native_deps = package.native_dependencies().map_err(|e| {
289            format!(
290                "invalid [native-dependencies] in package '{}': {}",
291                package_name, e
292            )
293        })?;
294        if !native_deps.is_empty() {
295            merge_native_scope(
296                &mut scopes_by_key,
297                BundledNativeDependencyScope {
298                    package_name: package_name.clone(),
299                    package_version: package_version.clone(),
300                    package_key: package_key.clone(),
301                    dependencies: native_deps,
302                },
303            );
304        }
305
306        if package.dependencies.is_empty() {
307            continue;
308        }
309
310        let Some(resolver) =
311            shape_runtime::dependency_resolver::DependencyResolver::new(canonical_root.clone())
312        else {
313            continue;
314        };
315        let resolved = resolver.resolve(&package.dependencies).map_err(|e| {
316            format!(
317                "failed to resolve dependencies for package '{}': {}",
318                package_name, e
319            )
320        })?;
321
322        for resolved_dep in resolved {
323            if resolved_dep
324                .path
325                .extension()
326                .is_some_and(|ext| ext == "shapec")
327            {
328                let bundle = shape_runtime::package_bundle::PackageBundle::read_from_file(
329                    &resolved_dep.path,
330                )
331                .map_err(|e| {
332                    format!(
333                        "failed to read dependency bundle '{}': {}",
334                        resolved_dep.path.display(),
335                        e
336                    )
337                })?;
338                for scope in bundle.native_dependency_scopes {
339                    merge_native_scope(&mut scopes_by_key, scope);
340                }
341                continue;
342            }
343
344            let dep_root = resolved_dep.path;
345            let dep_toml = dep_root.join("shape.toml");
346            let dep_source = match std::fs::read_to_string(&dep_toml) {
347                Ok(content) => content,
348                Err(_) => continue,
349            };
350            let dep_project = shape_runtime::project::parse_shape_project_toml(&dep_source)
351                .map_err(|err| {
352                    format!(
353                        "failed to parse dependency project '{}': {}",
354                        dep_toml.display(),
355                        err
356                    )
357                })?;
358            let (dep_name, dep_version, dep_key) =
359                normalize_package_identity(&dep_project, &resolved_dep.name, &resolved_dep.version);
360            queue.push_back((dep_root, dep_project, dep_name, dep_version, dep_key));
361        }
362    }
363
364    let mut scopes: Vec<_> = scopes_by_key.into_values().collect();
365    scopes.sort_by(|a, b| a.package_key.cmp(&b.package_key));
366    Ok(scopes)
367}
368
369fn native_spec_is_portable(spec: &shape_runtime::project::NativeDependencySpec) -> bool {
370    use shape_runtime::project::{NativeDependencyProvider, NativeDependencySpec};
371
372    match spec {
373        NativeDependencySpec::Simple(value) => !is_path_like_native_spec(value),
374        NativeDependencySpec::Detailed(detail) => {
375            if matches!(
376                spec.provider_for_host(),
377                NativeDependencyProvider::Path | NativeDependencyProvider::Vendored
378            ) {
379                return false;
380            }
381            for value in [&detail.path, &detail.linux, &detail.macos, &detail.windows] {
382                if value.as_deref().is_some_and(is_path_like_native_spec) {
383                    return false;
384                }
385            }
386            true
387        }
388    }
389}
390
391fn native_dependency_scope_is_portable(scope: &BundledNativeDependencyScope) -> bool {
392    scope.dependencies.values().all(native_spec_is_portable)
393}
394
395fn is_path_like_native_spec(spec: &str) -> bool {
396    let path = Path::new(spec);
397    path.is_absolute()
398        || spec.starts_with("./")
399        || spec.starts_with("../")
400        || spec.contains('/')
401        || spec.contains('\\')
402        || (spec.len() >= 2 && spec.as_bytes()[1] == b':')
403}
404
405/// Discover all .shape files in the project, returning (file_path, module_path) pairs.
406fn discover_shape_files(
407    root: &Path,
408    project: &ProjectRoot,
409) -> Result<Vec<(PathBuf, String)>, String> {
410    let mut files = Vec::new();
411
412    // Search in project root
413    collect_shape_files(root, root, &mut files)?;
414
415    // Search in configured module paths
416    for module_path in project.resolved_module_paths() {
417        if module_path.exists() && module_path.is_dir() {
418            collect_shape_files(&module_path, &module_path, &mut files)?;
419        }
420    }
421
422    // Deduplicate by file path
423    files.sort_by(|a, b| a.0.cmp(&b.0));
424    files.dedup_by(|a, b| a.0 == b.0);
425
426    Ok(files)
427}
428
429/// Recursively collect .shape files from a directory.
430fn collect_shape_files(
431    dir: &Path,
432    base: &Path,
433    files: &mut Vec<(PathBuf, String)>,
434) -> Result<(), String> {
435    let entries = std::fs::read_dir(dir)
436        .map_err(|e| format!("Failed to read directory '{}': {}", dir.display(), e))?;
437
438    for entry in entries {
439        let entry = entry.map_err(|e| format!("Failed to read dir entry: {}", e))?;
440        let path = entry.path();
441        let file_name = entry.file_name().to_string_lossy().to_string();
442
443        // Skip hidden dirs and common non-source dirs
444        if file_name.starts_with('.') || file_name == "target" || file_name == "node_modules" {
445            continue;
446        }
447
448        if path.is_dir() {
449            collect_shape_files(&path, base, files)?;
450        } else if path.extension().and_then(|e| e.to_str()) == Some("shape") {
451            let module_path = path_to_module_path(&path, base);
452            files.push((path, module_path));
453        }
454    }
455
456    Ok(())
457}
458
459/// Convert a file path to a module path using :: separator.
460///
461/// Examples:
462/// - `src/main.shape` -> `src::main`
463/// - `utils/helpers.shape` -> `utils::helpers`
464/// - `utils/index.shape` -> `utils`
465fn path_to_module_path(path: &Path, base: &Path) -> String {
466    let relative = path.strip_prefix(base).unwrap_or(path);
467
468    let without_ext = relative.with_extension("");
469    let parts: Vec<&str> = without_ext
470        .components()
471        .filter_map(|c| match c {
472            std::path::Component::Normal(s) => s.to_str(),
473            _ => None,
474        })
475        .collect();
476
477    // If the last component is "index", drop it (index.shape -> parent name)
478    if parts.last() == Some(&"index") && parts.len() > 1 {
479        parts[..parts.len() - 1].join("::")
480    } else if parts.last() == Some(&"index") {
481        // Root index.shape
482        String::new()
483    } else {
484        parts.join("::")
485    }
486}
487
488/// Collect export names from a parsed AST.
489fn collect_export_names(program: &shape_ast::ast::Program) -> Vec<String> {
490    let mut names = Vec::new();
491
492    for item in &program.items {
493        match item {
494            shape_ast::ast::Item::Export(export, _) => match &export.item {
495                shape_ast::ast::ExportItem::Function(func) => {
496                    names.push(func.name.clone());
497                }
498                shape_ast::ast::ExportItem::Named(specs) => {
499                    for spec in specs {
500                        names.push(spec.alias.clone().unwrap_or_else(|| spec.name.clone()));
501                    }
502                }
503                shape_ast::ast::ExportItem::TypeAlias(alias) => {
504                    names.push(alias.name.clone());
505                }
506                shape_ast::ast::ExportItem::Enum(e) => {
507                    names.push(e.name.clone());
508                }
509                shape_ast::ast::ExportItem::Struct(s) => {
510                    names.push(s.name.clone());
511                }
512                shape_ast::ast::ExportItem::Interface(i) => {
513                    names.push(i.name.clone());
514                }
515                shape_ast::ast::ExportItem::Trait(t) => {
516                    names.push(t.name.clone());
517                }
518                shape_ast::ast::ExportItem::ForeignFunction(f) => {
519                    names.push(f.name.clone());
520                }
521            },
522            _ => {}
523        }
524    }
525
526    names.sort();
527    names.dedup();
528    names
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    fn discover_system_library_alias() -> Option<String> {
536        let candidates = [
537            "libm.so.6",
538            "libc.so.6",
539            "libSystem.B.dylib",
540            "kernel32.dll",
541            "ucrtbase.dll",
542        ];
543        for candidate in candidates {
544            if unsafe { libloading::Library::new(candidate) }.is_ok() {
545                return Some(candidate.to_string());
546            }
547        }
548        None
549    }
550
551    #[test]
552    fn test_path_to_module_path_basic() {
553        let base = Path::new("/project");
554        assert_eq!(
555            path_to_module_path(Path::new("/project/main.shape"), base),
556            "main"
557        );
558        assert_eq!(
559            path_to_module_path(Path::new("/project/utils/helpers.shape"), base),
560            "utils::helpers"
561        );
562    }
563
564    #[test]
565    fn test_path_to_module_path_index() {
566        let base = Path::new("/project");
567        assert_eq!(
568            path_to_module_path(Path::new("/project/utils/index.shape"), base),
569            "utils"
570        );
571        assert_eq!(
572            path_to_module_path(Path::new("/project/index.shape"), base),
573            ""
574        );
575    }
576
577    #[test]
578    fn test_compile_temp_project() {
579        let tmp = tempfile::tempdir().expect("temp dir");
580        let root = tmp.path();
581
582        // Create shape.toml
583        std::fs::write(
584            root.join("shape.toml"),
585            r#"
586[project]
587name = "test-bundle"
588version = "0.1.0"
589"#,
590        )
591        .expect("write shape.toml");
592
593        // Create source files
594        std::fs::write(root.join("main.shape"), "pub fn run() { 42 }").expect("write main");
595        std::fs::create_dir_all(root.join("utils")).expect("create utils dir");
596        std::fs::write(root.join("utils/helpers.shape"), "pub fn helper() { 1 }")
597            .expect("write helpers");
598
599        let project =
600            shape_runtime::project::find_project_root(root).expect("should find project root");
601
602        let bundle = BundleCompiler::compile(&project).expect("compilation should succeed");
603
604        assert_eq!(bundle.metadata.name, "test-bundle");
605        assert_eq!(bundle.metadata.version, "0.1.0");
606        assert!(
607            bundle.modules.len() >= 2,
608            "should have at least 2 modules, got {}",
609            bundle.modules.len()
610        );
611
612        let main_mod = bundle.modules.iter().find(|m| m.module_path == "main");
613        assert!(main_mod.is_some(), "should have main module");
614
615        let helpers_mod = bundle
616            .modules
617            .iter()
618            .find(|m| m.module_path == "utils::helpers");
619        assert!(helpers_mod.is_some(), "should have utils::helpers module");
620    }
621
622    #[test]
623    fn test_compile_embeds_transitive_native_scopes_from_shapec_dependencies() {
624        let Some(alias) = discover_system_library_alias() else {
625            // Host test image does not expose a known system alias.
626            return;
627        };
628
629        let tmp = tempfile::tempdir().expect("temp dir");
630        let leaf_dir = tmp.path().join("leaf");
631        let mid_dir = tmp.path().join("mid");
632        std::fs::create_dir_all(&leaf_dir).expect("create leaf dir");
633        std::fs::create_dir_all(&mid_dir).expect("create mid dir");
634
635        std::fs::write(
636            leaf_dir.join("shape.toml"),
637            format!(
638                r#"
639[project]
640name = "leaf"
641version = "1.2.3"
642
643[native-dependencies]
644duckdb = {{ provider = "system", version = "1.0.0", linux = "{alias}", macos = "{alias}", windows = "{alias}" }}
645"#
646            ),
647        )
648        .expect("write leaf shape.toml");
649        std::fs::write(leaf_dir.join("main.shape"), "pub fn leaf_marker() { 1 }")
650            .expect("write leaf source");
651
652        let leaf_project = shape_runtime::project::find_project_root(&leaf_dir)
653            .expect("leaf project root should resolve");
654        let leaf_bundle = BundleCompiler::compile(&leaf_project).expect("compile leaf bundle");
655        let leaf_bundle_path = tmp.path().join("leaf.shapec");
656        leaf_bundle
657            .write_to_file(&leaf_bundle_path)
658            .expect("write leaf bundle");
659        assert!(
660            leaf_bundle
661                .native_dependency_scopes
662                .iter()
663                .any(|scope| scope.package_key == "leaf@1.2.3"
664                    && scope.dependencies.contains_key("duckdb")),
665            "leaf bundle should embed its native dependency scope"
666        );
667
668        std::fs::write(
669            mid_dir.join("shape.toml"),
670            r#"
671[project]
672name = "mid"
673version = "0.4.0"
674
675[dependencies]
676leaf = { path = "../leaf.shapec" }
677"#,
678        )
679        .expect("write mid shape.toml");
680        std::fs::write(mid_dir.join("main.shape"), "pub fn mid_marker() { 2 }")
681            .expect("write mid source");
682
683        let mid_project =
684            shape_runtime::project::find_project_root(&mid_dir).expect("mid project root");
685        let mid_bundle = BundleCompiler::compile(&mid_project).expect("compile mid bundle");
686
687        assert!(
688            mid_bundle
689                .native_dependency_scopes
690                .iter()
691                .any(|scope| scope.package_key == "leaf@1.2.3"
692                    && scope.dependencies.contains_key("duckdb")),
693            "mid bundle should preserve transitive native scopes from leaf.shapec"
694        );
695    }
696}