harn-vm 0.8.151

Async bytecode virtual machine for the Harn programming language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
//! Line coverage for executed Harn programs.
//!
//! Harn already stores a source line for every emitted instruction
//! (`Chunk::lines`), so line coverage needs no separate debug-info pass: the
//! denominator is the set of distinct non-zero lines a chunk (and its nested
//! function bodies) emit, and the numerator is the subset whose instructions
//! actually ran.
//!
//! ## How it is wired
//!
//! Coverage is opt-in and process-global so it captures every VM isolate a run
//! spins up (imports, parallel branches, spawned agents) without threading a
//! flag through every constructor:
//!
//! * [`begin_session`] flips [`is_enabled`] on and clears the merged report.
//! * Each [`crate::vm::Vm`] checks [`is_enabled`] at construction; when on it
//!   carries its own [`Coverage`] accumulator and records a hit per executed
//!   instruction in the dispatch loop.
//! * On drop a VM folds its accumulator into the global report.
//! * [`end_session`] flips coverage off and returns the merged [`Coverage`].
//!
//! ## File attribution
//!
//! A chunk compiled from an imported module carries its own `source_file`; the
//! entry file's top-level chunk and its same-file function bodies carry `None`.
//! We attribute a `None` chunk to the VM's primary file (the script under
//! execution), and otherwise to the chunk's `source_file`. Nested function
//! chunks inherit their parent's effective file when they carry no
//! `source_file` of their own, so a module's uncalled helpers are still counted
//! against the module — not misattributed to the entry script.
//!
//! Render filters to files that exist on disk, which drops the synthetic paths
//! the embedded stdlib and in-memory `eval` chunks report.

use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, OnceLock};

use crate::chunk::Chunk;

static COVERAGE_ON: AtomicBool = AtomicBool::new(false);
static GLOBAL_REPORT: OnceLock<Mutex<Coverage>> = OnceLock::new();

fn global() -> &'static Mutex<Coverage> {
    GLOBAL_REPORT.get_or_init(|| Mutex::new(Coverage::new()))
}

/// True while a coverage session is active. Read once per VM construction and
/// once per executed instruction, so it is a relaxed atomic load — effectively
/// free and branch-predicted "off" when no session is running.
#[inline]
pub fn is_enabled() -> bool {
    COVERAGE_ON.load(Ordering::Relaxed)
}

/// Start a coverage session: clear the merged report and enable recording on
/// every VM constructed until [`end_session`].
pub fn begin_session() {
    {
        let mut report = global().lock().unwrap();
        *report = Coverage::new();
    }
    COVERAGE_ON.store(true, Ordering::SeqCst);
}

/// End the coverage session and return the merged report.
pub fn end_session() -> Coverage {
    COVERAGE_ON.store(false, Ordering::SeqCst);
    let mut report = global().lock().unwrap();
    std::mem::take(&mut *report)
}

/// Build a per-VM accumulator when a session is active, seeding the primary
/// file used to attribute same-file (`source_file: None`) chunks. Returns
/// `None` when coverage is off, so the dispatch-loop hook is a single
/// `Option::is_some` branch on the hot path.
pub(crate) fn for_primary(primary_file: Option<&str>) -> Option<Coverage> {
    if !is_enabled() {
        return None;
    }
    let mut cov = Coverage::new();
    if let Some(file) = primary_file {
        cov.set_primary_file(file);
    }
    Some(cov)
}

/// Fold one VM's accumulator into the global report. Called from `Vm::drop`.
pub(crate) fn merge_into_global(data: Coverage) {
    if data.files.is_empty() {
        return;
    }
    let mut report = global().lock().unwrap();
    report.merge(data);
}

/// Hit/total line sets for a single source file.
#[derive(Debug, Clone, Default)]
struct FileLines {
    /// Every instrumentable (non-zero) line emitted for this file.
    total: BTreeSet<u32>,
    /// The subset that executed.
    hit: BTreeSet<u32>,
}

/// Accumulated line coverage. Used both as a per-VM accumulator and, after
/// merging, as the whole-run report.
#[derive(Debug, Clone, Default)]
pub struct Coverage {
    /// The script under execution; receives lines from chunks that carry no
    /// `source_file` of their own.
    primary_file: Option<Arc<str>>,
    files: BTreeMap<Arc<str>, FileLines>,
    /// Chunk ids whose denominator tree has already been walked (per VM).
    seen: HashSet<u64>,
    /// Resolved effective file per chunk id, so a hit needs no re-walk.
    file_of: HashMap<u64, Arc<str>>,
}

impl Coverage {
    pub(crate) fn new() -> Self {
        Self::default()
    }

    /// Record the VM's primary file (the script passed to `execute`). Only the
    /// first call wins so a nested sub-execution can't clobber it.
    pub(crate) fn set_primary_file(&mut self, file: &str) {
        if self.primary_file.is_none() {
            self.primary_file = Some(Arc::from(file));
        }
    }

    /// Record execution of the instruction at `ip` in `chunk`.
    pub(crate) fn record(&mut self, chunk: &Chunk, ip: usize) {
        let id = chunk.cache_id();
        let file = match self.file_of.get(&id) {
            Some(file) => file.clone(),
            None => {
                let effective = self.effective_file(chunk.source_file.as_deref());
                self.register_tree(chunk, &effective);
                self.file_of.get(&id).cloned().unwrap_or(effective)
            }
        };
        if let Some(&line) = chunk.lines.get(ip) {
            if line != 0 {
                self.files.entry(file).or_default().hit.insert(line);
            }
        }
    }

    /// Resolve the file a `None`-`source_file` chunk belongs to.
    fn effective_file(&self, source_file: Option<&str>) -> Arc<str> {
        match source_file {
            Some(path) => Arc::from(path),
            None => self
                .primary_file
                .clone()
                .unwrap_or_else(|| Arc::from("<unknown>")),
        }
    }

    /// Walk `chunk` and its nested function bodies once, adding every
    /// instrumentable line to the denominator. Idempotent per chunk id.
    fn register_tree(&mut self, chunk: &Chunk, effective: &Arc<str>) {
        let id = chunk.cache_id();
        if !self.seen.insert(id) {
            return;
        }
        self.file_of.insert(id, effective.clone());
        {
            let entry = self.files.entry(effective.clone()).or_default();
            for &line in &chunk.lines {
                if line != 0 {
                    entry.total.insert(line);
                }
            }
        }
        for func in &chunk.functions {
            let child = match func.chunk.source_file.as_deref() {
                Some(path) => Arc::from(path),
                None => effective.clone(),
            };
            self.register_tree(func.chunk.as_ref(), &child);
        }
    }

    fn merge(&mut self, other: Coverage) {
        for (file, lines) in other.files {
            let entry = self.files.entry(file).or_default();
            entry.total.extend(lines.total);
            entry.hit.extend(lines.hit);
        }
    }

    /// Files that exist on disk, in deterministic order. Drops the synthetic
    /// paths embedded-stdlib and in-memory `eval` chunks report.
    fn real_files(&self) -> Vec<(&str, &FileLines)> {
        self.files
            .iter()
            .filter(|(file, _)| Path::new(file.as_ref()).exists())
            .map(|(file, lines)| (file.as_ref(), lines))
            .collect()
    }

    /// `(covered, total)` line counts across all on-disk files.
    pub fn totals(&self) -> (usize, usize) {
        self.real_files()
            .into_iter()
            .fold((0, 0), |(cov, total), (_, lines)| {
                (cov + lines.hit.len(), total + lines.total.len())
            })
    }

    /// Whole-run line coverage percentage (0.0 when there is nothing to cover).
    pub fn percent(&self) -> f64 {
        let (covered, total) = self.totals();
        if total == 0 {
            0.0
        } else {
            covered as f64 / total as f64 * 100.0
        }
    }

    /// True when no on-disk file has any instrumentable line.
    pub fn is_empty(&self) -> bool {
        self.real_files().is_empty()
    }

    /// A human-readable per-file table plus a total line.
    pub fn render_text(&self) -> String {
        let files = self.real_files();
        if files.is_empty() {
            return "No coverage data (no executed source files found on disk).".to_string();
        }
        let name_width = files
            .iter()
            .map(|(file, _)| display_path(file).chars().count())
            .max()
            .unwrap_or(4)
            .clamp(4, 60);
        let mut out = String::new();
        out.push_str(&format!(
            "{:<name_width$}  {:>6}  {:>7}  {:>6}\n",
            "File", "Lines", "Covered", "%"
        ));
        for (file, lines) in &files {
            let total = lines.total.len();
            let covered = lines.hit.len();
            out.push_str(&format!(
                "{:<name_width$}  {:>6}  {:>7}  {:>5.1}\n",
                truncate(&display_path(file), name_width),
                total,
                covered,
                pct(covered, total),
            ));
        }
        let (covered, total) = self.totals();
        out.push_str(&format!(
            "{:<name_width$}  {:>6}  {:>7}  {:>5.1}\n",
            "TOTAL",
            total,
            covered,
            pct(covered, total),
        ));
        out
    }

    /// LCOV `tracefile` output for Codecov / VS Code Coverage Gutters / genhtml.
    pub fn render_lcov(&self) -> String {
        let mut out = String::new();
        for (file, lines) in self.real_files() {
            out.push_str("TN:\n");
            out.push_str(&format!("SF:{file}\n"));
            for &line in &lines.total {
                let count = u8::from(lines.hit.contains(&line));
                out.push_str(&format!("DA:{line},{count}\n"));
            }
            out.push_str(&format!("LF:{}\n", lines.total.len()));
            out.push_str(&format!("LH:{}\n", lines.hit.len()));
            out.push_str("end_of_record\n");
        }
        out
    }
}

fn pct(covered: usize, total: usize) -> f64 {
    if total == 0 {
        0.0
    } else {
        covered as f64 / total as f64 * 100.0
    }
}

/// Show a path relative to the current dir when possible, for compact tables.
fn display_path(file: &str) -> String {
    if let Ok(cwd) = std::env::current_dir() {
        if let Ok(rel) = Path::new(file).strip_prefix(&cwd) {
            return rel.to_string_lossy().into_owned();
        }
    }
    file.to_string()
}

fn truncate(text: &str, width: usize) -> String {
    let count = text.chars().count();
    if count <= width {
        return text.to_string();
    }
    // Keep the tail (the file name) since the leading dirs are the common
    // prefix that carries the least signal.
    let keep = width.saturating_sub(1);
    let tail: String = text.chars().skip(count - keep).collect();
    format!("{tail}")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::chunk::{Chunk, Op};

    fn chunk_with_lines(lines: &[u32]) -> Chunk {
        let mut chunk = Chunk::new();
        for &line in lines {
            chunk.emit(Op::Nil, line);
        }
        chunk
    }

    #[test]
    fn denominator_counts_distinct_nonzero_lines() {
        let chunk = chunk_with_lines(&[1, 1, 2, 0, 3]);
        let mut cov = Coverage::new();
        cov.set_primary_file("/does/not/matter.harn");
        // Register the denominator without executing anything.
        cov.register_tree(&chunk, &Arc::from("/does/not/matter.harn"));
        let lines = cov.files.values().next().unwrap();
        // Lines 1, 2, 3 are instrumentable; the duplicate 1 and the 0 collapse.
        assert_eq!(
            lines.total.iter().copied().collect::<Vec<_>>(),
            vec![1, 2, 3]
        );
        assert!(lines.hit.is_empty());
    }

    #[test]
    fn hits_are_a_subset_of_the_denominator() {
        let chunk = chunk_with_lines(&[10, 11, 12]);
        let mut cov = Coverage::new();
        cov.set_primary_file("/x.harn");
        // Execute the instructions at index 0 and 2 (lines 10 and 12).
        cov.record(&chunk, 0);
        cov.record(&chunk, 2);
        let lines = cov.files.values().next().unwrap();
        assert_eq!(lines.total.len(), 3);
        assert_eq!(lines.hit.iter().copied().collect::<Vec<_>>(), vec![10, 12]);
    }

    #[test]
    fn line_zero_is_not_instrumentable() {
        let chunk = chunk_with_lines(&[0, 5]);
        let mut cov = Coverage::new();
        cov.set_primary_file("/x.harn");
        cov.record(&chunk, 0); // line 0 — synthetic, ignored
        cov.record(&chunk, 1); // line 5 — counted
        let lines = cov.files.values().next().unwrap();
        assert_eq!(lines.total.iter().copied().collect::<Vec<_>>(), vec![5]);
        assert_eq!(lines.hit.iter().copied().collect::<Vec<_>>(), vec![5]);
    }

    #[test]
    fn merge_unions_totals_and_hits() {
        let mut a = Coverage::new();
        a.files.entry(Arc::from("/f.harn")).or_default().total = BTreeSet::from([1, 2, 3]);
        a.files.entry(Arc::from("/f.harn")).or_default().hit = BTreeSet::from([1]);
        let mut b = Coverage::new();
        b.files.entry(Arc::from("/f.harn")).or_default().total = BTreeSet::from([3, 4]);
        b.files.entry(Arc::from("/f.harn")).or_default().hit = BTreeSet::from([4]);
        a.merge(b);
        let lines = &a.files[&Arc::<str>::from("/f.harn")];
        assert_eq!(
            lines.total.iter().copied().collect::<Vec<_>>(),
            vec![1, 2, 3, 4]
        );
        assert_eq!(lines.hit.iter().copied().collect::<Vec<_>>(), vec![1, 4]);
    }

    #[test]
    fn empty_report_renders_a_valid_empty_lcov() {
        // An empty report has no on-disk records, so the tracefile is empty —
        // still a valid LCOV file, which `--coverage-out` writes rather than
        // skipping (a missing artifact would break a CI consumer).
        let cov = Coverage::new();
        assert!(cov.is_empty());
        assert_eq!(cov.render_lcov(), "");
    }

    #[test]
    fn lcov_shapes_da_lines() {
        // Use a real on-disk path so the render filter keeps it.
        let path = std::env::current_exe().unwrap();
        let path_str = path.to_string_lossy().into_owned();
        let mut cov = Coverage::new();
        let arc: Arc<str> = Arc::from(path_str.as_str());
        cov.files.entry(arc.clone()).or_default().total = BTreeSet::from([1, 2]);
        cov.files.entry(arc).or_default().hit = BTreeSet::from([1]);
        let lcov = cov.render_lcov();
        assert!(lcov.contains(&format!("SF:{path_str}")));
        assert!(lcov.contains("DA:1,1"));
        assert!(lcov.contains("DA:2,0"));
        assert!(lcov.contains("LF:2"));
        assert!(lcov.contains("LH:1"));
        assert!(lcov.contains("end_of_record"));
    }
}