Skip to main content

shape_vm/
stdlib.rs

1//! Standard library compilation for Shape VM
2//!
3//! This module handles compiling the core stdlib modules at engine initialization.
4//! Core modules are auto-imported and available without explicit imports.
5//! Domain-specific modules (finance, iot, etc.) require explicit imports.
6
7use std::path::Path;
8use std::sync::OnceLock;
9
10use shape_ast::error::{Result, ShapeError};
11use shape_runtime::module_loader::ModuleLoader;
12
13use crate::bytecode::BytecodeProgram;
14use crate::compiler::BytecodeCompiler;
15
16static CORE_STDLIB_CACHE: OnceLock<Result<BytecodeProgram>> = OnceLock::new();
17
18fn stdlib_compile_logs_enabled() -> bool {
19    std::env::var("SHAPE_TRACE_STDLIB_COMPILE")
20        .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "YES"))
21        .unwrap_or(false)
22}
23
24/// Pre-compiled core stdlib bytecode (MessagePack-serialized BytecodeProgram).
25/// Regenerate with: cargo run -p stdlib-gen
26#[cfg(not(test))]
27const EMBEDDED_CORE_STDLIB: Option<&[u8]> = Some(include_bytes!("../embedded/core_stdlib.msgpack"));
28
29// Tests always recompile from source to validate compiler changes
30#[cfg(test)]
31const EMBEDDED_CORE_STDLIB: Option<&[u8]> = None;
32
33/// Compile all core stdlib modules into a single BytecodeProgram
34///
35/// The core modules are those in `stdlib/core/` which are auto-imported
36/// and available without explicit import statements.
37///
38/// Uses precompiled embedded bytecode when available, falling back to
39/// source compilation. Set `SHAPE_FORCE_SOURCE_STDLIB=1` to force source.
40///
41/// # Returns
42///
43/// A merged BytecodeProgram containing all core functions, types, and metas.
44pub fn compile_core_modules() -> Result<BytecodeProgram> {
45    CORE_STDLIB_CACHE
46        .get_or_init(load_core_modules_best_effort)
47        .clone()
48}
49
50fn load_core_modules_best_effort() -> Result<BytecodeProgram> {
51    // Env override: force source compilation (for debugging/development)
52    if std::env::var("SHAPE_FORCE_SOURCE_STDLIB").is_ok() {
53        return compile_core_modules_from_source();
54    }
55
56    // Try embedded precompiled artifact first
57    if let Some(bytes) = EMBEDDED_CORE_STDLIB {
58        match load_from_embedded(bytes) {
59            Ok(program) => return Ok(program),
60            Err(e) => {
61                if stdlib_compile_logs_enabled() {
62                    eprintln!(
63                        "  Embedded stdlib deserialization failed: {}, falling back to source",
64                        e
65                    );
66                }
67            }
68        }
69    }
70
71    // Fallback: compile from source
72    compile_core_modules_from_source()
73}
74
75fn load_from_embedded(bytes: &[u8]) -> Result<BytecodeProgram> {
76    let mut program: BytecodeProgram =
77        rmp_serde::from_slice(bytes).map_err(|e| ShapeError::RuntimeError {
78            message: format!("Failed to deserialize embedded stdlib: {}", e),
79            location: None,
80        })?;
81    program.ensure_string_index();
82    Ok(program)
83}
84
85/// Extract top-level binding names from precompiled core bytecode.
86/// Used to seed the compiler with known names without loading AST into persistent context.
87pub fn core_binding_names() -> Vec<String> {
88    match compile_core_modules() {
89        Ok(program) => {
90            let mut names: Vec<String> =
91                program.functions.iter().map(|f| f.name.clone()).collect();
92            for name in &program.module_binding_names {
93                if !names.contains(name) {
94                    names.push(name.clone());
95                }
96            }
97            names
98        }
99        Err(_) => Vec::new(),
100    }
101}
102
103/// Compile core stdlib from source (parse + compile). Used as fallback and for tests.
104///
105/// Each module is compiled independently (preserving its own scope for builtins
106/// and intrinsics), then the bytecodes are merged via `merge_append`.
107pub fn compile_core_modules_from_source() -> Result<BytecodeProgram> {
108    let trace = stdlib_compile_logs_enabled();
109    if trace {
110        eprintln!("  Compiling core stdlib...");
111    }
112    let mut loader = ModuleLoader::new();
113    let core_modules = loader.list_core_stdlib_module_imports()?;
114    if core_modules.is_empty() {
115        return Ok(BytecodeProgram::new());
116    }
117
118    let mut merged = BytecodeProgram::new();
119    for import_path in core_modules {
120        let file_name = import_path.strip_prefix("std.").unwrap_or(&import_path);
121        match loader.load_module(&import_path).and_then(|module| {
122            BytecodeCompiler::compile_module_ast(&module.ast).map(|(program, _)| program)
123        }) {
124            Ok(module_program) => {
125                if trace {
126                    eprintln!("    Compiled {}", file_name);
127                }
128                merged.merge_append(module_program);
129            }
130            Err(e) => {
131                if trace {
132                    eprintln!("    Warning: failed to compile {}: {}", file_name, e);
133                }
134            }
135        }
136    }
137
138    if trace {
139        eprintln!("  Finished core stdlib compilation");
140    }
141    Ok(merged)
142}
143
144/// Compile all Shape files in a directory (recursively) into a single BytecodeProgram.
145/// Each file is compiled independently, then merged via `merge_append`.
146pub fn compile_directory(dir: &Path) -> Result<BytecodeProgram> {
147    let mut merged = BytecodeProgram::new();
148    compile_directory_into(&mut merged, dir)?;
149    Ok(merged)
150}
151
152/// Recursively compile all Shape files in a directory and merge into the given program.
153fn compile_directory_into(program: &mut BytecodeProgram, dir: &Path) -> Result<()> {
154    let entries = std::fs::read_dir(dir).map_err(|e| ShapeError::ModuleError {
155        message: format!("Failed to read directory {:?}: {}", dir, e),
156        module_path: Some(dir.to_path_buf()),
157    })?;
158
159    for entry in entries {
160        let entry = entry.map_err(|e| ShapeError::ModuleError {
161            message: format!("Failed to read directory entry: {}", e),
162            module_path: Some(dir.to_path_buf()),
163        })?;
164
165        let path = entry.path();
166
167        if path.is_dir() {
168            compile_directory_into(program, &path)?;
169        } else if path.extension().and_then(|s| s.to_str()) == Some("shape") {
170            let file_name = path
171                .file_name()
172                .and_then(|s| s.to_str())
173                .unwrap_or("unknown");
174            match compile_file(&path) {
175                Ok(file_program) => {
176                    eprintln!("    Compiled {}", file_name);
177                    program.merge_append(file_program);
178                }
179                Err(e) => {
180                    eprintln!("    Warning: failed to compile {}: {}", file_name, e);
181                }
182            }
183        }
184    }
185
186    Ok(())
187}
188
189/// Compile an in-memory Shape source string into a BytecodeProgram.
190/// Used for extension-bundled Shape code (e.g., `include_str!("duckdb.shape")`).
191pub fn compile_source(filename: &str, source: &str) -> Result<BytecodeProgram> {
192    let program = shape_ast::parser::parse_program(source).map_err(|e| ShapeError::ParseError {
193        message: format!("Failed to parse {}: {}", filename, e),
194        location: None,
195    })?;
196
197    let mut compiler = BytecodeCompiler::new();
198    compiler.set_source_with_file(source, filename);
199    compiler.compile(&program)
200}
201
202/// Compile a single Shape file into a BytecodeProgram
203pub fn compile_file(path: &Path) -> Result<BytecodeProgram> {
204    let source = std::fs::read_to_string(path).map_err(|e| ShapeError::ModuleError {
205        message: format!("Failed to read file {:?}: {}", path, e),
206        module_path: Some(path.to_path_buf()),
207    })?;
208
209    let program =
210        shape_ast::parser::parse_program(&source).map_err(|e| ShapeError::ParseError {
211            message: format!("Failed to parse {:?}: {}", path, e),
212            location: None,
213        })?;
214
215    let mut compiler = BytecodeCompiler::new();
216    compiler.set_source_with_file(&source, &path.to_string_lossy());
217    compiler.compile(&program)
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    #[test]
225    fn test_core_bytecode_has_snapshot_schema() {
226        let core = compile_core_modules().expect("Core modules should compile");
227        let snapshot = core.type_schema_registry.get("Snapshot");
228        assert!(
229            snapshot.is_some(),
230            "Core bytecode should contain Snapshot enum schema"
231        );
232        let enum_info = snapshot.unwrap().get_enum_info();
233        assert!(enum_info.is_some(), "Snapshot should be an enum");
234        let info = enum_info.unwrap();
235        assert!(
236            info.variant_by_name("Hash").is_some(),
237            "Snapshot should have Hash variant"
238        );
239        assert!(
240            info.variant_by_name("Resumed").is_some(),
241            "Snapshot should have Resumed variant"
242        );
243    }
244
245    #[test]
246    fn test_core_bytecode_registers_queryable_trait_dispatch_symbols() {
247        let core = compile_core_modules().expect("Core modules should compile");
248        let filter = core.lookup_trait_method_symbol("Queryable", "Table", None, "filter");
249        let map = core.lookup_trait_method_symbol("Queryable", "Table", None, "map");
250        let execute = core.lookup_trait_method_symbol("Queryable", "Table", None, "execute");
251
252        assert_eq!(filter, Some("Table::filter"));
253        assert_eq!(map, Some("Table::map"));
254        assert_eq!(execute, Some("Table::execute"));
255    }
256
257    #[test]
258    fn test_compile_empty_directory() {
259        // Create a temp directory and compile it
260        let temp_dir = std::env::temp_dir().join("shape_test_empty");
261        let _ = std::fs::create_dir_all(&temp_dir);
262
263        let result = compile_directory(&temp_dir);
264        assert!(result.is_ok());
265
266        let program = result.unwrap();
267        // Should have a Halt instruction at minimum
268        assert!(
269            program.instructions.is_empty()
270                || program.instructions.last().map(|i| i.opcode)
271                    == Some(crate::bytecode::OpCode::Halt)
272        );
273
274        let _ = std::fs::remove_dir_all(&temp_dir);
275    }
276
277    #[test]
278    fn test_compile_source_simple_function() {
279        let source = r#"
280            fn double(x) { x * 2 }
281        "#;
282        let result = compile_source("test.shape", source);
283        assert!(
284            result.is_ok(),
285            "compile_source should succeed: {:?}",
286            result.err()
287        );
288
289        let program = result.unwrap();
290        assert!(
291            !program.functions.is_empty(),
292            "Should have at least one function"
293        );
294        assert!(
295            program.functions.iter().any(|f| f.name == "double"),
296            "Should contain 'double' function"
297        );
298    }
299
300    #[test]
301    fn test_compile_source_parse_error() {
302        let source = "fn broken(( { }";
303        let result = compile_source("broken.shape", source);
304        assert!(result.is_err(), "Should fail on invalid syntax");
305    }
306
307    #[test]
308    fn test_compile_source_enum_definition() {
309        let source = r#"
310            enum Direction {
311                Up,
312                Down,
313                Left,
314                Right
315            }
316        "#;
317        let result = compile_source("enums.shape", source);
318        assert!(
319            result.is_ok(),
320            "compile_source should handle enums: {:?}",
321            result.err()
322        );
323    }
324
325    #[test]
326    fn test_embedded_stdlib_round_trip() {
327        // Compile from source, serialize, deserialize, and verify key properties match
328        let source = compile_core_modules_from_source().expect("Source compilation should succeed");
329        let bytes = rmp_serde::to_vec(&source).expect("Serialization should succeed");
330        let deserialized = load_from_embedded(&bytes).expect("Deserialization should succeed");
331
332        assert_eq!(
333            source.functions.len(),
334            deserialized.functions.len(),
335            "Function count should match after round-trip"
336        );
337        assert_eq!(
338            source.instructions.len(),
339            deserialized.instructions.len(),
340            "Instruction count should match after round-trip"
341        );
342        assert_eq!(
343            source.constants.len(),
344            deserialized.constants.len(),
345            "Constant count should match after round-trip"
346        );
347        assert!(
348            !deserialized.functions.is_empty(),
349            "Deserialized should have functions"
350        );
351    }
352
353    #[test]
354    fn test_body_length_within_bounds() {
355        let program = compile_core_modules_from_source().expect("compile");
356        let total = program.instructions.len();
357        let mut bad = Vec::new();
358        for (i, f) in program.functions.iter().enumerate() {
359            let end = f.entry_point + f.body_length;
360            if end > total {
361                bad.push(format!(
362                    "func[{}] '{}' entry={} body_length={} end={} exceeds total={}",
363                    i, f.name, f.entry_point, f.body_length, end, total
364                ));
365            }
366        }
367        assert!(bad.is_empty(), "Functions with OOB body_length:\n{}", bad.join("\n"));
368    }
369
370    #[test]
371    fn test_core_binding_names() {
372        let names = core_binding_names();
373        assert!(!names.is_empty(), "Should have binding names from stdlib");
374    }
375}