Skip to main content

harn_vm/
stdlib.rs

1//! Standard library builtins for the Harn VM.
2//!
3//! Every builtin is declared with the `#[harn_builtin]` proc-macro
4//! (`crate::stdlib::macros::harn_builtin`). Each annotation emits a sibling
5//! `static <FN>_DEF: VmBuiltinDef` carrying the signature, aliases, handler,
6//! and metadata, and registers it into the workspace-global
7//! [`macros::ALL_BUILTIN_DEFS`] distributed slice at link time. The CLI / LSP /
8//! lint / serve / dap binaries call [`force_link`] to defeat rlib dead-code
9//! stripping (linkme issue #36) so every static lands in the slice. Modules
10//! still expose a `register_<module>_builtins(vm)` helper for ordered eager
11//! registration (e.g. so `clock::timestamp` can override `process::timestamp`).
12//! `register_vm_stdlib` calls those helpers in order and then installs the
13//! aggregated signatures into the parser registry.
14//!
15//! See `CONTRIBUTING.md` ("Adding a stdlib builtin") for the full template.
16
17pub mod macros;
18
19mod agent_sessions;
20pub mod agent_state;
21pub(crate) mod agents;
22mod agents_daemon;
23mod artifact_emit;
24pub(crate) mod assemble;
25pub mod asset_paths;
26mod bytes;
27mod calendar;
28mod channel_guardrails;
29mod channels;
30pub(crate) mod clock;
31pub(crate) mod collections;
32mod command_policy;
33pub(crate) mod compaction;
34mod compression;
35mod concurrency;
36mod connectors;
37mod cookies;
38mod crypto;
39mod csv;
40mod datetime;
41mod durable_step;
42mod event_log;
43mod external_agent;
44pub(crate) mod files;
45mod flow;
46mod fs;
47mod git;
48mod grounding;
49pub(crate) mod harn_entry;
50pub(crate) mod hitl;
51mod hitl_read;
52pub mod host;
53pub mod http_response;
54pub(crate) mod io;
55mod iter;
56pub(crate) mod json;
57mod json_query;
58pub(crate) mod json_stream;
59mod jsonrpc;
60mod junit;
61mod lifecycle_receipts;
62mod logging;
63pub mod long_running;
64mod math;
65pub(crate) mod memory;
66mod monitors;
67mod multipart;
68mod net;
69mod net_policy;
70mod oauth_dynreg;
71mod oauth_storage;
72pub(crate) mod observability;
73pub(crate) mod options;
74mod path;
75pub(crate) mod path_scope_guard;
76pub(crate) mod pool;
77#[cfg(feature = "postgres")]
78mod postgres;
79#[cfg(feature = "postgres")]
80pub use postgres::install_shared_pool_registry;
81pub mod process;
82pub(crate) mod process_spawn;
83mod project;
84mod project_catalog;
85mod project_enrich;
86mod regex;
87mod review;
88mod runtime_scope;
89pub(crate) mod sandbox;
90pub mod secret_scan;
91mod sets;
92pub(crate) mod shapes;
93mod skills;
94#[cfg(feature = "sqlite")]
95mod sqlite;
96pub(crate) mod strings;
97pub(crate) mod supervisor;
98pub mod template;
99mod testbench;
100mod testing;
101mod timing;
102pub mod token_redaction;
103pub(crate) mod tool_hooks;
104pub(crate) mod tools;
105pub mod tracing;
106mod transcript_compact;
107pub(crate) mod transcript_project;
108mod triggers_stdlib;
109mod tui;
110mod types;
111mod url_parse;
112mod vision;
113pub(crate) mod waitpoint;
114mod waitpoints;
115mod web;
116pub mod workflow_messages;
117pub(crate) mod xml;
118
119use crate::http::register_http_builtins;
120use crate::llm::register_llm_builtins;
121use crate::mcp::register_mcp_builtins;
122use crate::mcp_server::register_mcp_server_builtins;
123use crate::vm::Vm;
124
125pub(crate) use crate::schema::{json_to_vm_value, schema_result_value};
126pub(crate) fn set_thread_source_dir(dir: &std::path::Path) {
127    process::set_thread_source_dir(dir);
128}
129
130/// Register core builtins: pure/deterministic, no I/O.
131pub fn register_core_stdlib(vm: &mut Vm) {
132    crate::runtime_context::register_runtime_context_builtins(vm);
133    types::register_type_builtins(vm);
134    math::register_math_builtins(vm);
135    strings::register_string_builtins(vm);
136    json::register_json_builtins(vm);
137    json_stream::register_json_stream_builtins(vm);
138    xml::register_xml_builtins(vm);
139    datetime::register_datetime_builtins(vm);
140    calendar::register_calendar_builtins(vm);
141    regex::register_regex_builtins(vm);
142    bytes::register_bytes_builtins(vm);
143    compression::register_compression_builtins(vm);
144    command_policy::register_command_policy_builtins(vm);
145    runtime_scope::register_runtime_scope_builtins(vm);
146    crypto::register_crypto_builtins(vm);
147    csv::register_csv_builtins(vm);
148    junit::register_junit_builtins(vm);
149    multipart::register_multipart_builtins(vm);
150    url_parse::register_url_builtins(vm);
151    web::register_web_builtins(vm);
152    cookies::register_cookie_builtins(vm);
153    path::register_path_helper_builtins(vm);
154    sets::register_set_builtins(vm);
155    collections::register_collection_builtins(vm);
156    iter::register_iter_builtins(vm);
157    event_log::register_event_log_builtins(vm);
158    durable_step::register_durable_step_builtins(vm);
159    channels::register_channel_builtins(vm);
160    channel_guardrails::register_channel_guardrail_builtins(vm);
161    shapes::register_shape_builtins(vm);
162    testing::register_testing_builtins(vm);
163    flow::register_flow_builtins(vm);
164    lifecycle_receipts::register_lifecycle_receipt_builtins(vm);
165    net_policy::register_net_policy_builtins(vm);
166    http_response::register_http_response_builtins(vm);
167}
168
169/// Register I/O builtins (requires OS access).
170pub fn register_io_stdlib(vm: &mut Vm) {
171    io::register_io_builtins(vm);
172    host::register_host_builtins(vm);
173    fs::register_fs_builtins(vm);
174    files::register_file_builtins(vm);
175    git::register_git_builtins(vm);
176    vision::register_vision_builtins(vm);
177    agent_state::register_agent_state_builtins(vm);
178    memory::register_memory_builtins(vm);
179    net::register_net_builtins(vm);
180    process::register_process_builtins(vm);
181    process::register_path_builtins(vm);
182    sandbox::register_sandbox_builtins(vm);
183    // Clock builtins overlay process::timestamp/elapsed so they honor
184    // mock_time / advance_time. Register AFTER process to take precedence.
185    clock::register_clock_builtins(vm);
186    crate::durable_rate_limit::register_durable_rate_limit_builtins(vm);
187    testbench::register_testbench_builtins(vm);
188    project::register_project_builtins(vm);
189    grounding::register_grounding_builtins(vm);
190    tracing::register_tracing_builtins(vm);
191    observability::register_observability_builtins(vm);
192    timing::register_timing_builtins(vm);
193    tui::register_tui_builtins(vm);
194}
195
196fn register_agent_stdlib_before_llm(vm: &mut Vm) {
197    concurrency::register_concurrency_builtins(vm);
198    connectors::register_connector_builtins(vm);
199    review::register_review_builtins(vm);
200    secret_scan::register_secret_scan_builtins(vm);
201    tools::register_tool_builtins(vm);
202    tool_hooks::register_tool_hooks_builtins(vm);
203    crate::composition::register_composition_builtins(vm);
204    skills::register_skill_builtins(vm);
205    agents_daemon::register_daemon_builtins(vm);
206    triggers_stdlib::register_trigger_builtins(vm);
207    #[cfg(feature = "postgres")]
208    postgres::register_postgres_builtins(vm);
209    #[cfg(feature = "sqlite")]
210    sqlite::register_sqlite_builtins(vm);
211    waitpoints::register_waitpoint_builtins(vm);
212    monitors::register_monitor_builtins(vm);
213    hitl::register_hitl_builtins(vm);
214    hitl_read::register_hitl_read_builtins(vm);
215    waitpoint::register_waitpoint_builtins(vm);
216    supervisor::register_supervisor_builtins(vm);
217    agents::register_agent_builtins(vm);
218    pool::register_pool_builtins(vm);
219    oauth_storage::register_oauth_storage_builtins(vm);
220    oauth_dynreg::register_oauth_dynreg_builtins(vm);
221    token_redaction::register_token_redaction_builtins(vm);
222    agent_sessions::register_agent_session_builtins(vm);
223    artifact_emit::register_artifact_emit_builtins(vm);
224    external_agent::register_external_agent_builtins(vm);
225    path_scope_guard::register_path_scope_guard_builtins(vm);
226    workflow_messages::register_workflow_message_builtins(vm);
227    transcript_compact::register_transcript_compaction_builtins(vm);
228    compaction::register_compaction_builtins(vm);
229    transcript_project::register_transcript_projection_builtins(vm);
230    assemble::register_assemble_context_builtin(vm);
231    crate::egress::register_egress_builtins(vm);
232    crate::security::register_security_builtins(vm);
233    register_http_builtins(vm);
234    jsonrpc::register_jsonrpc_builtins(vm);
235}
236
237fn register_agent_stdlib_after_llm(vm: &mut Vm) {
238    register_mcp_builtins(vm);
239    register_mcp_server_builtins(vm);
240    crate::step_runtime::register_step_builtins(vm);
241}
242
243/// Register agent builtins (requires network access and async runtime).
244pub fn register_agent_stdlib(vm: &mut Vm) {
245    register_agent_stdlib_before_llm(vm);
246    register_llm_builtins(vm);
247    register_agent_stdlib_after_llm(vm);
248}
249
250/// Register all standard builtins on a VM (core + io + agent). Also
251/// installs the macro-emitted signature slice into the parser registry
252/// (idempotent under repeat calls with the same slice pointer).
253pub fn register_vm_stdlib(vm: &mut Vm) {
254    register_core_stdlib(vm);
255    register_io_stdlib(vm);
256    register_agent_stdlib(vm);
257    if vm.global("harness").is_none() {
258        vm.set_harness(crate::harness::Harness::real());
259    }
260    harn_builtin_registry::install_builtin_signatures(all_builtin_signatures());
261}
262
263pub(crate) fn rebind_execution_state_builtins(vm: &mut Vm) {
264    concurrency::register_concurrency_builtins(vm);
265}
266
267fn stdlib_probe_vm() -> Vm {
268    let mut vm = Vm::new();
269    register_vm_stdlib(&mut vm);
270    // Name-only/metadata introspection never accesses this path, but passing
271    // a real per-platform temp dir keeps registration logic honest if a
272    // callee someday validates its parent.
273    let tmp = std::env::temp_dir();
274    crate::store::register_store_builtins(&mut vm, &tmp);
275    crate::checkpoint::register_checkpoint_builtins(&mut vm, &tmp, "default");
276    crate::metadata::register_metadata_builtins(&mut vm, &tmp);
277    // Install the macro-emitted signatures into the parser registry so any
278    // probe-driven name/metadata query (e.g. the alignment test) sees the
279    // post-migration sig set. Idempotent under repeat install with the same
280    // pointer (which `all_builtin_signatures()` guarantees).
281    harn_builtin_registry::install_builtin_signatures(all_builtin_signatures());
282    vm
283}
284
285/// Aggregate of every `#[harn_builtin]`-emitted `VmBuiltinDef` in the stdlib.
286///
287/// Backed by the `linkme::distributed_slice` declared on
288/// [`crate::stdlib::macros::ALL_BUILTIN_DEFS`] — every annotated fn
289/// contributes one entry automatically at link time. Keep builtin registration
290/// on this distributed slice instead of per-module arrays plus a central
291/// hand-maintained aggregator.
292///
293/// **Force-link warning** (linkme issue #36): rlib dead-code stripping
294/// can drop these statics when `harn-vm` is linked transitively. Every
295/// binary that exercises builtins (`harn-cli`, `harn-lsp`, `harn-lint`,
296/// `harn-serve`, `harn-dap`) calls [`force_link`] near `main()` to defeat
297/// the stripping. The alignment test
298/// `linkme_distributed_slice_populates_with_all_builtins` catches a silent
299/// regression by asserting the slice is non-empty.
300pub fn all_builtin_defs() -> &'static [&'static macros::VmBuiltinDef] {
301    &macros::ALL_BUILTIN_DEFS
302}
303
304/// Force-link entry point: a `pub fn` that touches `ALL_BUILTIN_DEFS` so
305/// the linker keeps every `#[harn_builtin]`-emitted static. Drivers
306/// (`harn-cli`, `harn-lsp`, etc.) call this once at startup. Doing nothing
307/// at runtime is fine — the side effect is purely a link-time signal.
308///
309/// See [`linkme issue #36`](https://github.com/dtolnay/linkme/issues/36)
310/// for why the explicit touch is necessary on every supported target.
311pub fn force_link() {
312    // `black_box` prevents LLVM from constant-folding the length read away.
313    // The `>= 1` guard never trips at runtime but is a load-bearing safety
314    // net: it converts a silent slice-empty regression into a panic that
315    // surfaces at the first builtin call instead of a confusing
316    // `HARN-NAM-002` somewhere down the line.
317    let len = std::hint::black_box(macros::ALL_BUILTIN_DEFS.len());
318    assert!(
319        len >= 1,
320        "linkme distributed_slice ALL_BUILTIN_DEFS is empty — \
321         the binary is missing `harn_vm::stdlib::force_link()` at startup, \
322         or the linker stripped the harn-vm rlib statics (see linkme issue #36)"
323    );
324}
325
326/// Driver-facing helper: flatten the macro-emitted `BuiltinDef`s into a
327/// `&'static [&'static BuiltinSignature]` slice suitable for
328/// [`harn_builtin_registry::install_builtin_signatures`].
329///
330/// Aliases are expanded into their own `BuiltinSignature` entries (the
331/// allocation is leaked once at startup — process-lifetime is appropriate
332/// for a global registry).
333pub fn all_builtin_signatures() -> &'static [&'static harn_builtin_meta::BuiltinSignature] {
334    use std::sync::OnceLock;
335    static AGG: OnceLock<Vec<&'static harn_builtin_meta::BuiltinSignature>> = OnceLock::new();
336    AGG.get_or_init(|| {
337        let mut out: Vec<&'static harn_builtin_meta::BuiltinSignature> = Vec::new();
338        for def in all_builtin_defs() {
339            if def.runtime_only {
340                continue;
341            }
342            out.push(&def.sig);
343            for alias in def.aliases {
344                let aliased = harn_builtin_meta::BuiltinSignature {
345                    name: alias,
346                    ..def.sig
347                };
348                out.push(Box::leak(Box::new(aliased)));
349            }
350        }
351        out
352    })
353    .as_slice()
354}
355
356/// Register every `#[harn_builtin]`-emitted def on the given VM. Drivers
357/// that build the full stdlib via `register_vm_stdlib` get this for free —
358/// each module's `register_*_builtins` walks its `MODULE_BUILTINS` slice.
359/// This helper is exposed for embedders / tests that want a one-call entry.
360pub fn register_all_macro_builtins(vm: &mut Vm) {
361    for def in all_builtin_defs() {
362        vm.register_builtin_def(def);
363    }
364}
365
366/// Return the canonical list of all stdlib builtin names. Used by
367/// harn-lint and harn-lsp to avoid hardcoded duplicate lists.
368pub fn stdlib_builtin_names() -> Vec<String> {
369    let vm = stdlib_probe_vm();
370    let mut names = vm.builtin_names();
371    // Special opcodes/keywords, not registered builtins, but linter
372    // should recognize them as valid function calls.
373    for extra in [
374        "spawn",
375        "await",
376        "cancel",
377        "cancel_graceful",
378        "__signal_interrupted",
379        "__signal_off_interrupt",
380        "__signal_on_interrupt",
381        "__signal_raise",
382        "is_cancelled",
383    ] {
384        names.push(extra.to_string());
385    }
386    names
387}
388
389/// Return discoverable metadata for registered stdlib builtins.
390pub fn stdlib_builtin_metadata() -> Vec<crate::vm::VmBuiltinMetadata> {
391    stdlib_probe_vm().builtin_metadata()
392}
393
394/// Reset thread-local stdlib state. Call between test runs.
395///
396/// Note: `long_running::reset_state()` is intentionally NOT called here
397/// because that store is process-global, not thread-local. Wiping it
398/// from a per-test reset hook lets one test cancel another test's
399/// in-flight worker thread (and lose its `agent_inbox::push`
400/// notification), which surfaces as `walk_dir_long_running` /
401/// `glob_long_running` timing out under parallel test load. The two
402/// call sites that genuinely need a clean handle store —
403/// `stdlib::fs::tests::{walk_dir_long_running,glob_long_running}` — call
404/// `long_running::reset_state()` explicitly while holding
405/// `LONG_RUNNING_TEST_LOCK`.
406pub fn reset_stdlib_state() {
407    logging::reset_logging_state();
408    process::reset_process_state();
409    clock::reset_clock_state();
410    io::reset_io_state();
411    sandbox::reset_sandbox_state();
412    fs::reset_fs_state();
413    json::reset_json_state();
414    json_stream::reset_json_stream_state();
415    host::reset_host_state();
416    observability::reset_observability_state();
417    timing::reset_timing_state();
418    durable_step::reset_durable_step_state();
419    crate::egress::reset_egress_policy_for_host();
420    hitl::reset_hitl_state();
421    crate::http::reset_http_state();
422    crate::external_agent::reset_external_agent_state();
423    jsonrpc::reset_jsonrpc_state();
424    monitors::reset_monitor_state();
425    waitpoints::reset_waitpoint_state();
426    waitpoint::reset_waitpoint_state();
427    triggers_stdlib::reset_auto_resume_timeouts();
428    compaction::reset_compaction_state();
429    agents::reset_agent_worker_state();
430    agents::workflow::reset_workflow_run_states();
431    pool::reset_pool_state();
432    #[cfg(feature = "postgres")]
433    postgres::reset_postgres_state();
434    #[cfg(feature = "sqlite")]
435    sqlite::reset_sqlite_state();
436    supervisor::reset_supervisor_state();
437    agents::records::reset_eval_metrics();
438    agents::records::reset_friction_events();
439    tools::clear_current_tool_registry();
440    tools::clear_tool_synthesis_cache();
441    vision::reset_vision_state();
442    crate::skills::clear_current_skill_registry();
443    template::reset_prompt_registry();
444    crate::triggers::clear_webhook_intake_state();
445    crate::llm::cache::reset_in_process_cache_state();
446}
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451
452    #[tokio::test(flavor = "current_thread")]
453    async fn register_vm_stdlib_installs_default_harness_handle() {
454        let chunk = crate::compile_source(
455            r"
456fn __probe_global_harness_clock() {
457  let now = harness.clock.now_ms()
458  return now >= 0
459}
460
461fn main(harness: Harness) {
462  return __probe_global_harness_clock()
463}
464",
465        )
466        .expect("compile harness clock probe");
467        let mut vm = Vm::new();
468        register_vm_stdlib(&mut vm);
469
470        assert!(vm.global("harness").is_some());
471        let result = vm
472            .execute(&chunk)
473            .await
474            .expect("execute harness clock probe");
475        assert!(matches!(result, crate::value::VmValue::Bool(true)));
476    }
477}