Skip to main content

shape_vm/
stdlib.rs

1//! Standard library compilation for Shape VM
2//!
3//! This module handles compiling the core stdlib modules at engine initialization.
4//! Core modules are auto-imported and available without explicit imports.
5//! Domain-specific modules (finance, iot, etc.) require explicit imports.
6
7use std::path::Path;
8use std::sync::OnceLock;
9
10use shape_ast::error::{Result, ShapeError};
11use shape_runtime::module_loader::ModuleLoader;
12
13use crate::bytecode::BytecodeProgram;
14use crate::compiler::BytecodeCompiler;
15
16static CORE_STDLIB_CACHE: OnceLock<Result<BytecodeProgram>> = OnceLock::new();
17
18fn stdlib_compile_logs_enabled() -> bool {
19    std::env::var("SHAPE_TRACE_STDLIB_COMPILE")
20        .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "YES"))
21        .unwrap_or(false)
22}
23
24/// Pre-compiled core stdlib bytecode (MessagePack-serialized BytecodeProgram).
25/// Regenerate with: cargo run -p stdlib-gen
26#[cfg(not(test))]
27const EMBEDDED_CORE_STDLIB: Option<&[u8]> = Some(include_bytes!("../embedded/core_stdlib.msgpack"));
28
29// Tests always recompile from source to validate compiler changes
30#[cfg(test)]
31const EMBEDDED_CORE_STDLIB: Option<&[u8]> = None;
32
33/// Compile all core stdlib modules into a single BytecodeProgram
34///
35/// The core modules are those in `stdlib/core/` which are auto-imported
36/// and available without explicit import statements.
37///
38/// Uses precompiled embedded bytecode when available, falling back to
39/// source compilation. Set `SHAPE_FORCE_SOURCE_STDLIB=1` to force source.
40///
41/// # Returns
42///
43/// A merged BytecodeProgram containing all core functions, types, and metas.
44pub fn compile_core_modules() -> Result<BytecodeProgram> {
45    CORE_STDLIB_CACHE
46        .get_or_init(load_core_modules_best_effort)
47        .clone()
48}
49
50fn load_core_modules_best_effort() -> Result<BytecodeProgram> {
51    // Env override: force source compilation (for debugging/development)
52    if std::env::var("SHAPE_FORCE_SOURCE_STDLIB").is_ok() {
53        return compile_core_modules_from_source();
54    }
55
56    // Try embedded precompiled artifact first
57    if let Some(bytes) = EMBEDDED_CORE_STDLIB {
58        match load_from_embedded(bytes) {
59            Ok(program) => return Ok(program),
60            Err(e) => {
61                if stdlib_compile_logs_enabled() {
62                    eprintln!(
63                        "  Embedded stdlib deserialization failed: {}, falling back to source",
64                        e
65                    );
66                }
67            }
68        }
69    }
70
71    // Fallback: compile from source
72    compile_core_modules_from_source()
73}
74
75fn load_from_embedded(bytes: &[u8]) -> Result<BytecodeProgram> {
76    let mut program: BytecodeProgram =
77        rmp_serde::from_slice(bytes).map_err(|e| ShapeError::RuntimeError {
78            message: format!("Failed to deserialize embedded stdlib: {}", e),
79            location: None,
80        })?;
81    program.ensure_string_index();
82    Ok(program)
83}
84
85/// Extract top-level binding names from precompiled core bytecode.
86/// Used to seed the compiler with known names without loading AST into persistent context.
87pub fn core_binding_names() -> Vec<String> {
88    match compile_core_modules() {
89        Ok(program) => {
90            let mut names: Vec<String> = program.functions.iter().map(|f| f.name.clone()).collect();
91            for name in &program.module_binding_names {
92                if !names.contains(name) {
93                    names.push(name.clone());
94                }
95            }
96            names
97        }
98        Err(_) => Vec::new(),
99    }
100}
101
102/// Compile core stdlib from source (parse + compile). Used as fallback and for tests.
103///
104/// Each module is compiled independently (preserving its own scope for builtins
105/// and intrinsics), then the bytecodes are merged via `merge_append`.
106pub fn compile_core_modules_from_source() -> Result<BytecodeProgram> {
107    let trace = stdlib_compile_logs_enabled();
108    if trace {
109        eprintln!("  Compiling core stdlib...");
110    }
111    let mut loader = ModuleLoader::new();
112    let core_modules = loader.list_core_stdlib_module_imports()?;
113    if core_modules.is_empty() {
114        return Ok(BytecodeProgram::new());
115    }
116
117    let mut merged = BytecodeProgram::new();
118    for import_path in core_modules {
119        let file_name = import_path.strip_prefix("std.").unwrap_or(&import_path);
120        match loader.load_module(&import_path).and_then(|module| {
121            BytecodeCompiler::compile_module_ast(&module.ast).map(|(program, _)| program)
122        }) {
123            Ok(module_program) => {
124                if trace {
125                    eprintln!("    Compiled {}", file_name);
126                }
127                merged.merge_append(module_program);
128            }
129            Err(e) => {
130                if trace {
131                    eprintln!("    Warning: failed to compile {}: {}", file_name, e);
132                }
133            }
134        }
135    }
136
137    if trace {
138        eprintln!("  Finished core stdlib compilation");
139    }
140    Ok(merged)
141}
142
143/// Compile all Shape files in a directory (recursively) into a single BytecodeProgram.
144/// Each file is compiled independently, then merged via `merge_append`.
145pub fn compile_directory(dir: &Path) -> Result<BytecodeProgram> {
146    let mut merged = BytecodeProgram::new();
147    compile_directory_into(&mut merged, dir)?;
148    Ok(merged)
149}
150
151/// Recursively compile all Shape files in a directory and merge into the given program.
152fn compile_directory_into(program: &mut BytecodeProgram, dir: &Path) -> Result<()> {
153    let entries = std::fs::read_dir(dir).map_err(|e| ShapeError::ModuleError {
154        message: format!("Failed to read directory {:?}: {}", dir, e),
155        module_path: Some(dir.to_path_buf()),
156    })?;
157
158    for entry in entries {
159        let entry = entry.map_err(|e| ShapeError::ModuleError {
160            message: format!("Failed to read directory entry: {}", e),
161            module_path: Some(dir.to_path_buf()),
162        })?;
163
164        let path = entry.path();
165
166        if path.is_dir() {
167            compile_directory_into(program, &path)?;
168        } else if path.extension().and_then(|s| s.to_str()) == Some("shape") {
169            let file_name = path
170                .file_name()
171                .and_then(|s| s.to_str())
172                .unwrap_or("unknown");
173            match compile_file(&path) {
174                Ok(file_program) => {
175                    eprintln!("    Compiled {}", file_name);
176                    program.merge_append(file_program);
177                }
178                Err(e) => {
179                    eprintln!("    Warning: failed to compile {}: {}", file_name, e);
180                }
181            }
182        }
183    }
184
185    Ok(())
186}
187
188/// Compile an in-memory Shape source string into a BytecodeProgram.
189/// Used for extension-bundled Shape code (e.g., `include_str!("duckdb.shape")`).
190pub fn compile_source(filename: &str, source: &str) -> Result<BytecodeProgram> {
191    let program = shape_ast::parser::parse_program(source).map_err(|e| ShapeError::ParseError {
192        message: format!("Failed to parse {}: {}", filename, e),
193        location: None,
194    })?;
195
196    let mut compiler = BytecodeCompiler::new();
197    compiler.set_source_with_file(source, filename);
198    compiler.compile(&program)
199}
200
201/// Compile a single Shape file into a BytecodeProgram
202pub fn compile_file(path: &Path) -> Result<BytecodeProgram> {
203    let source = std::fs::read_to_string(path).map_err(|e| ShapeError::ModuleError {
204        message: format!("Failed to read file {:?}: {}", path, e),
205        module_path: Some(path.to_path_buf()),
206    })?;
207
208    let program =
209        shape_ast::parser::parse_program(&source).map_err(|e| ShapeError::ParseError {
210            message: format!("Failed to parse {:?}: {}", path, e),
211            location: None,
212        })?;
213
214    let mut compiler = BytecodeCompiler::new();
215    compiler.set_source_with_file(&source, &path.to_string_lossy());
216    compiler.compile(&program)
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_core_bytecode_has_snapshot_schema() {
225        let core = compile_core_modules().expect("Core modules should compile");
226        let snapshot = core.type_schema_registry.get("Snapshot");
227        assert!(
228            snapshot.is_some(),
229            "Core bytecode should contain Snapshot enum schema"
230        );
231        let enum_info = snapshot.unwrap().get_enum_info();
232        assert!(enum_info.is_some(), "Snapshot should be an enum");
233        let info = enum_info.unwrap();
234        assert!(
235            info.variant_by_name("Hash").is_some(),
236            "Snapshot should have Hash variant"
237        );
238        assert!(
239            info.variant_by_name("Resumed").is_some(),
240            "Snapshot should have Resumed variant"
241        );
242    }
243
244    #[test]
245    fn test_core_bytecode_registers_queryable_trait_dispatch_symbols() {
246        let core = compile_core_modules().expect("Core modules should compile");
247        let filter = core.lookup_trait_method_symbol("Queryable", "Table", None, "filter");
248        let map = core.lookup_trait_method_symbol("Queryable", "Table", None, "map");
249        let execute = core.lookup_trait_method_symbol("Queryable", "Table", None, "execute");
250
251        assert_eq!(filter, Some("Table::filter"));
252        assert_eq!(map, Some("Table::map"));
253        assert_eq!(execute, Some("Table::execute"));
254    }
255
256    #[test]
257    fn test_compile_empty_directory() {
258        // Create a temp directory and compile it
259        let temp_dir = std::env::temp_dir().join("shape_test_empty");
260        let _ = std::fs::create_dir_all(&temp_dir);
261
262        let result = compile_directory(&temp_dir);
263        assert!(result.is_ok());
264
265        let program = result.unwrap();
266        // Should have a Halt instruction at minimum
267        assert!(
268            program.instructions.is_empty()
269                || program.instructions.last().map(|i| i.opcode)
270                    == Some(crate::bytecode::OpCode::Halt)
271        );
272
273        let _ = std::fs::remove_dir_all(&temp_dir);
274    }
275
276    #[test]
277    fn test_compile_source_simple_function() {
278        let source = r#"
279            fn double(x) { x * 2 }
280        "#;
281        let result = compile_source("test.shape", source);
282        assert!(
283            result.is_ok(),
284            "compile_source should succeed: {:?}",
285            result.err()
286        );
287
288        let program = result.unwrap();
289        assert!(
290            !program.functions.is_empty(),
291            "Should have at least one function"
292        );
293        assert!(
294            program.functions.iter().any(|f| f.name == "double"),
295            "Should contain 'double' function"
296        );
297    }
298
299    #[test]
300    fn test_compile_source_parse_error() {
301        let source = "fn broken(( { }";
302        let result = compile_source("broken.shape", source);
303        assert!(result.is_err(), "Should fail on invalid syntax");
304    }
305
306    #[test]
307    fn test_compile_source_enum_definition() {
308        let source = r#"
309            enum Direction {
310                Up,
311                Down,
312                Left,
313                Right
314            }
315        "#;
316        let result = compile_source("enums.shape", source);
317        assert!(
318            result.is_ok(),
319            "compile_source should handle enums: {:?}",
320            result.err()
321        );
322    }
323
324    #[test]
325    fn test_embedded_stdlib_round_trip() {
326        // Compile from source, serialize, deserialize, and verify key properties match
327        let source = compile_core_modules_from_source().expect("Source compilation should succeed");
328        let bytes = rmp_serde::to_vec(&source).expect("Serialization should succeed");
329        let deserialized = load_from_embedded(&bytes).expect("Deserialization should succeed");
330
331        assert_eq!(
332            source.functions.len(),
333            deserialized.functions.len(),
334            "Function count should match after round-trip"
335        );
336        assert_eq!(
337            source.instructions.len(),
338            deserialized.instructions.len(),
339            "Instruction count should match after round-trip"
340        );
341        assert_eq!(
342            source.constants.len(),
343            deserialized.constants.len(),
344            "Constant count should match after round-trip"
345        );
346        assert!(
347            !deserialized.functions.is_empty(),
348            "Deserialized should have functions"
349        );
350    }
351
352    #[test]
353    fn test_body_length_within_bounds() {
354        let program = compile_core_modules_from_source().expect("compile");
355        let total = program.instructions.len();
356        let mut bad = Vec::new();
357        for (i, f) in program.functions.iter().enumerate() {
358            let end = f.entry_point + f.body_length;
359            if end > total {
360                bad.push(format!(
361                    "func[{}] '{}' entry={} body_length={} end={} exceeds total={}",
362                    i, f.name, f.entry_point, f.body_length, end, total
363                ));
364            }
365        }
366        assert!(
367            bad.is_empty(),
368            "Functions with OOB body_length:\n{}",
369            bad.join("\n")
370        );
371    }
372
373    #[test]
374    fn test_core_binding_names() {
375        let names = core_binding_names();
376        assert!(!names.is_empty(), "Should have binding names from stdlib");
377    }
378}