Skip to main content

bock_codegen/
generator.rs

1//! Code generator trait and output types.
2
3use std::path::PathBuf;
4
5use bock_air::NodeKind;
6use bock_types::AIRModule;
7
8use crate::error::CodegenError;
9use crate::profile::TargetProfile;
10
11// ─── GeneratedCode ───────────────────────────────────────────────────────────
12
13/// Output from code generation — consistent across all targets.
14#[derive(Debug, Clone)]
15pub struct GeneratedCode {
16    /// Generated output files (path + content pairs).
17    pub files: Vec<OutputFile>,
18    /// Source map from AIR spans to target spans (optional for v1).
19    pub source_map: Option<SourceMap>,
20}
21
22/// A single generated output file.
23#[derive(Debug, Clone)]
24pub struct OutputFile {
25    /// Relative path for the output file.
26    pub path: PathBuf,
27    /// Generated source code content.
28    pub content: String,
29}
30
31/// Maps AIR source spans to generated code spans.
32///
33/// Populated by JS/TS code generators with pointwise mappings from generated
34/// `(line, col)` back to source `(line, col)`. For other targets, only the
35/// legacy `entries` list (AIR node id → target byte range) is populated.
36#[derive(Debug, Clone, Default)]
37pub struct SourceMap {
38    /// Legacy entries keyed by AIR node id (present for all targets).
39    pub entries: Vec<SourceMapEntry>,
40    /// Pointwise position mappings from generated code to source.
41    pub mappings: Vec<SourceMapping>,
42    /// The generated output file these mappings refer to (e.g. `"output.js"`).
43    pub generated_file: String,
44    /// Source files referenced by `mappings`, in file-id order.
45    /// Each entry is `(path, optional_inline_content)`.
46    pub sources: Vec<SourceInfo>,
47}
48
49/// A single source-map entry linking an AIR span to a target span.
50#[derive(Debug, Clone)]
51pub struct SourceMapEntry {
52    /// AIR node id.
53    pub air_node_id: u32,
54    /// Index into `GeneratedCode::files`.
55    pub file_index: usize,
56    /// Byte offset in the generated file.
57    pub target_start: usize,
58    /// Byte length in the generated file.
59    pub target_len: usize,
60}
61
62/// A single pointwise mapping from a position in generated code to a position
63/// in the originating Bock source.
64#[derive(Debug, Clone)]
65pub struct SourceMapping {
66    /// 1-indexed line in the generated file.
67    pub gen_line: u32,
68    /// 1-indexed column (character count) in the generated file.
69    pub gen_col: u32,
70    /// 1-indexed source line. `0` means unresolved — call
71    /// [`SourceMap::resolve_positions`] with source content to fill this in.
72    pub src_line: u32,
73    /// 1-indexed source column. `0` when unresolved.
74    pub src_col: u32,
75    /// Byte offset into the source file; used to (re)compute line/col.
76    pub src_offset: u32,
77    /// File-registry id of the source file (index into `SourceMap::sources`).
78    pub src_file_id: u32,
79}
80
81/// Metadata for a source file referenced by a [`SourceMap`].
82#[derive(Debug, Clone)]
83pub struct SourceInfo {
84    /// File path (relative or absolute), as it should appear in the emitted
85    /// source-map JSON.
86    pub path: String,
87    /// Optional inline content — when present, embedded into the `.map` file
88    /// via `sourcesContent`.
89    pub content: Option<String>,
90}
91
92impl SourceMap {
93    /// Fills in `src_line` and `src_col` on every mapping by looking up
94    /// `src_offset` inside `sources_content`, which is indexed by
95    /// `src_file_id`. Mappings whose `src_file_id` is out of range are left
96    /// unresolved.
97    pub fn resolve_positions(&mut self, sources_content: &[&str]) {
98        for m in &mut self.mappings {
99            let Some(src) = sources_content.get(m.src_file_id as usize) else {
100                continue;
101            };
102            let (line, col) = byte_to_line_col(src, m.src_offset as usize);
103            m.src_line = line;
104            m.src_col = col;
105        }
106    }
107
108    /// Emits a Source Map v3 JSON document referring to this map's
109    /// `generated_file` and `sources`. Only mappings whose `src_line` is
110    /// non-zero are included.
111    #[must_use]
112    pub fn to_source_map_v3_json(&self) -> String {
113        let mut out = String::new();
114        out.push_str("{\"version\":3,\"file\":\"");
115        out.push_str(&escape_json(&self.generated_file));
116        out.push_str("\",\"sourceRoot\":\"\",\"sources\":[");
117        for (i, s) in self.sources.iter().enumerate() {
118            if i > 0 {
119                out.push(',');
120            }
121            out.push('"');
122            out.push_str(&escape_json(&s.path));
123            out.push('"');
124        }
125        out.push_str("],\"sourcesContent\":[");
126        for (i, s) in self.sources.iter().enumerate() {
127            if i > 0 {
128                out.push(',');
129            }
130            match &s.content {
131                Some(c) => {
132                    out.push('"');
133                    out.push_str(&escape_json(c));
134                    out.push('"');
135                }
136                None => out.push_str("null"),
137            }
138        }
139        out.push_str("],\"names\":[],\"mappings\":\"");
140        out.push_str(&encode_vlq_mappings(&self.mappings));
141        out.push_str("\"}");
142        out
143    }
144}
145
146/// Convert a UTF-8 byte offset into a 1-indexed (line, column) pair. Column
147/// counts Unicode scalar values, not bytes — matching `bock-source`.
148fn byte_to_line_col(src: &str, offset: usize) -> (u32, u32) {
149    let offset = offset.min(src.len());
150    let before = &src[..offset];
151    let line = before.bytes().filter(|b| *b == b'\n').count() as u32 + 1;
152    let line_start = before.rfind('\n').map_or(0, |i| i + 1);
153    let col = src[line_start..offset].chars().count() as u32 + 1;
154    (line, col)
155}
156
157/// Minimal JSON string escaper for the small subset of characters that
158/// appear in paths and source files.
159fn escape_json(s: &str) -> String {
160    let mut out = String::with_capacity(s.len());
161    for c in s.chars() {
162        match c {
163            '"' => out.push_str("\\\""),
164            '\\' => out.push_str("\\\\"),
165            '\n' => out.push_str("\\n"),
166            '\r' => out.push_str("\\r"),
167            '\t' => out.push_str("\\t"),
168            '\u{08}' => out.push_str("\\b"),
169            '\u{0C}' => out.push_str("\\f"),
170            c if (c as u32) < 0x20 => {
171                out.push_str(&format!("\\u{:04x}", c as u32));
172            }
173            c => out.push(c),
174        }
175    }
176    out
177}
178
179/// Encode mappings as a Source Map v3 "mappings" string (semicolons between
180/// generated lines, commas between segments, VLQ-encoded deltas).
181fn encode_vlq_mappings(mappings: &[SourceMapping]) -> String {
182    let mut resolved: Vec<&SourceMapping> =
183        mappings.iter().filter(|m| m.src_line > 0).collect();
184    resolved.sort_by_key(|m| (m.gen_line, m.gen_col));
185
186    let mut out = String::new();
187    let mut prev_gen_line: u32 = 1;
188    let mut prev_gen_col: i64 = 0;
189    let mut prev_src_file: i64 = 0;
190    let mut prev_src_line: i64 = 0;
191    let mut prev_src_col: i64 = 0;
192
193    let mut first_on_line = true;
194    for m in resolved {
195        while prev_gen_line < m.gen_line {
196            out.push(';');
197            prev_gen_line += 1;
198            prev_gen_col = 0;
199            first_on_line = true;
200        }
201        if !first_on_line {
202            out.push(',');
203        }
204        let gen_col = (m.gen_col as i64) - 1;
205        let src_file = m.src_file_id as i64;
206        let src_line = (m.src_line as i64) - 1;
207        let src_col = (m.src_col as i64) - 1;
208
209        vlq_encode(&mut out, gen_col - prev_gen_col);
210        vlq_encode(&mut out, src_file - prev_src_file);
211        vlq_encode(&mut out, src_line - prev_src_line);
212        vlq_encode(&mut out, src_col - prev_src_col);
213
214        prev_gen_col = gen_col;
215        prev_src_file = src_file;
216        prev_src_line = src_line;
217        prev_src_col = src_col;
218        first_on_line = false;
219    }
220    out
221}
222
223/// Base-64 VLQ encode a single signed integer onto `out`.
224fn vlq_encode(out: &mut String, value: i64) {
225    const BASE64: &[u8] =
226        b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
227    let mut v: u64 = if value < 0 {
228        ((-value as u64) << 1) | 1
229    } else {
230        (value as u64) << 1
231    };
232    loop {
233        let mut digit = (v & 0x1F) as u8;
234        v >>= 5;
235        if v != 0 {
236            digit |= 0x20;
237        }
238        out.push(BASE64[digit as usize] as char);
239        if v == 0 {
240            break;
241        }
242    }
243}
244
245// ─── CodeGenerator trait ─────────────────────────────────────────────────────
246
247/// The trait all per-target code generators implement.
248///
249/// Each target (JS, TS, Python, Rust, Go) provides a struct that implements
250/// this trait. The `generate_module` method transforms a fully-lowered AIR
251/// module into target-specific source code.
252pub trait CodeGenerator {
253    /// Returns the target profile for this generator.
254    fn target(&self) -> &TargetProfile;
255
256    /// Returns `true` when the given AIR node should go through Tier 1
257    /// AI synthesis (§17.2, Q3 amended).
258    ///
259    /// The default implementation consults [`TargetProfile::ai_hints`]
260    /// via [`crate::ai_synthesis::needs_ai_synthesis`]. Backends that
261    /// want per-node overrides (e.g., only non-trivial `match`
262    /// expressions) can override this method.
263    fn needs_ai_synthesis(&self, node: &bock_air::AIRNode) -> bool {
264        crate::ai_synthesis::needs_ai_synthesis(self.target(), node)
265    }
266
267    /// Generates target code from a fully-lowered AIR module.
268    ///
269    /// # Errors
270    ///
271    /// Returns `CodegenError` if the module contains constructs that cannot
272    /// be represented in the target language.
273    fn generate_module(&self, module: &AIRModule) -> Result<GeneratedCode, CodegenError>;
274
275    /// Returns the source-code snippet that invokes the user's `main` function
276    /// as the entry point for this target, or `None` if the target has a
277    /// native entry-point convention (Rust `fn main`, Go `func main`) that
278    /// runs without a synthetic call.
279    ///
280    /// `main_is_async` is `true` when the user's `main` function is declared
281    /// `async fn`; targets with native async runtimes (JS, TS, Python) wrap
282    /// the call in an event-loop driver in that case.
283    ///
284    /// Targets that need a trailing invocation (JS, TS, Python) override this
285    /// to return e.g. `"main();\n"`. The default `generate_project` appends
286    /// the snippet when any module declares a top-level `main` function.
287    fn entry_invocation(&self, main_is_async: bool) -> Option<String> {
288        let _ = main_is_async;
289        None
290    }
291
292    /// Generates target code from multiple AIR modules, producing a single concatenated output.
293    ///
294    /// The default implementation generates each module separately and joins the
295    /// results into one file. Generators that need to deduplicate preambles
296    /// (e.g., Go's `package` declaration) should override this method.
297    fn generate_project(&self, modules: &[&AIRModule]) -> Result<GeneratedCode, CodegenError> {
298        let mut combined = String::new();
299        let mut merged_map: Option<SourceMap> = None;
300        for module in modules {
301            let code = self.generate_module(module)?;
302            // For each file, record where its content starts in `combined`
303            // (as a 0-indexed line count). Generators typically emit one file.
304            let mut file_shifts: Vec<u32> = Vec::with_capacity(code.files.len());
305            for file in &code.files {
306                if !combined.is_empty() && !file.content.is_empty() {
307                    combined.push('\n');
308                }
309                file_shifts.push(count_newlines(&combined) as u32);
310                combined.push_str(&file.content);
311            }
312            if let Some(mut sm) = code.source_map {
313                let shift = file_shifts.first().copied().unwrap_or(0);
314                for m in &mut sm.mappings {
315                    m.gen_line = m.gen_line.saturating_add(shift);
316                }
317                match &mut merged_map {
318                    Some(acc) => {
319                        acc.mappings.append(&mut sm.mappings);
320                        for src in sm.sources {
321                            if !acc.sources.iter().any(|s| s.path == src.path) {
322                                acc.sources.push(src);
323                            }
324                        }
325                    }
326                    None => merged_map = Some(sm),
327                }
328            }
329        }
330
331        let main_is_async = modules.iter().any(|m| module_main_fn_is_async(m));
332        if let Some(invocation) = self.entry_invocation(main_is_async) {
333            if modules.iter().any(|m| module_declares_main_fn(m)) {
334                if !combined.is_empty() && !combined.ends_with('\n') {
335                    combined.push('\n');
336                }
337                combined.push_str(&invocation);
338            }
339        }
340
341        let ext = &self.target().conventions.file_extension;
342        let out_path = format!("output.{ext}");
343        if let Some(sm) = &mut merged_map {
344            sm.generated_file = out_path.clone();
345        }
346        Ok(GeneratedCode {
347            files: vec![OutputFile {
348                path: PathBuf::from(out_path),
349                content: combined,
350            }],
351            source_map: merged_map,
352        })
353    }
354}
355
356fn count_newlines(s: &str) -> usize {
357    s.bytes().filter(|b| *b == b'\n').count()
358}
359
360/// Returns true if the given AIR module declares a top-level function named
361/// `main`. Used by the build pipeline to decide whether to append an
362/// entry-point invocation to the generated output of targets without a
363/// native main convention.
364#[must_use]
365pub fn module_declares_main_fn(module: &AIRModule) -> bool {
366    let NodeKind::Module { items, .. } = &module.kind else {
367        return false;
368    };
369    items.iter().any(|item| {
370        matches!(
371            &item.kind,
372            NodeKind::FnDecl { name, .. } if name.name == "main"
373        )
374    })
375}
376
377/// Returns true if the given AIR module declares a top-level `async fn main`.
378/// Used by `generate_project` to select an async-aware entry invocation.
379#[must_use]
380pub fn module_main_fn_is_async(module: &AIRModule) -> bool {
381    let NodeKind::Module { items, .. } = &module.kind else {
382        return false;
383    };
384    items.iter().any(|item| {
385        matches!(
386            &item.kind,
387            NodeKind::FnDecl { name, is_async: true, .. } if name.name == "main"
388        )
389    })
390}
391
392// ─── Tests ───────────────────────────────────────────────────────────────────
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    #[test]
399    fn output_file_stores_path_and_content() {
400        let f = OutputFile {
401            path: PathBuf::from("main.js"),
402            content: "console.log('hello');".into(),
403        };
404        assert_eq!(f.path, PathBuf::from("main.js"));
405        assert!(f.content.contains("console.log"));
406    }
407
408    #[test]
409    fn generated_code_with_no_source_map() {
410        let code = GeneratedCode {
411            files: vec![OutputFile {
412                path: PathBuf::from("out.py"),
413                content: "print('hello')".into(),
414            }],
415            source_map: None,
416        };
417        assert_eq!(code.files.len(), 1);
418        assert!(code.source_map.is_none());
419    }
420
421    #[test]
422    fn source_map_default_is_empty() {
423        let sm = SourceMap::default();
424        assert!(sm.entries.is_empty());
425        assert!(sm.mappings.is_empty());
426        assert!(sm.sources.is_empty());
427    }
428
429    #[test]
430    fn byte_to_line_col_basic() {
431        let s = "abc\ndef\nghi";
432        assert_eq!(byte_to_line_col(s, 0), (1, 1));
433        assert_eq!(byte_to_line_col(s, 3), (1, 4));
434        assert_eq!(byte_to_line_col(s, 4), (2, 1));
435        assert_eq!(byte_to_line_col(s, 8), (3, 1));
436    }
437
438    #[test]
439    fn resolve_positions_fills_line_col() {
440        let mut sm = SourceMap {
441            mappings: vec![SourceMapping {
442                gen_line: 1,
443                gen_col: 1,
444                src_line: 0,
445                src_col: 0,
446                src_offset: 4,
447                src_file_id: 0,
448            }],
449            ..Default::default()
450        };
451        sm.resolve_positions(&["abc\ndef"]);
452        assert_eq!(sm.mappings[0].src_line, 2);
453        assert_eq!(sm.mappings[0].src_col, 1);
454    }
455
456    #[test]
457    fn vlq_encodes_known_values() {
458        // Source Map v3 VLQ reference values.
459        let mut s = String::new();
460        vlq_encode(&mut s, 0);
461        assert_eq!(s, "A");
462        s.clear();
463        vlq_encode(&mut s, 1);
464        assert_eq!(s, "C");
465        s.clear();
466        vlq_encode(&mut s, -1);
467        assert_eq!(s, "D");
468        s.clear();
469        vlq_encode(&mut s, 16);
470        assert_eq!(s, "gB");
471    }
472
473    #[test]
474    fn source_map_v3_json_contains_required_fields() {
475        let mut sm = SourceMap::default();
476        sm.generated_file = "output.js".into();
477        sm.sources.push(SourceInfo {
478            path: "main.bock".into(),
479            content: Some("let x = 1\n".into()),
480        });
481        sm.mappings.push(SourceMapping {
482            gen_line: 1,
483            gen_col: 1,
484            src_line: 1,
485            src_col: 1,
486            src_offset: 0,
487            src_file_id: 0,
488        });
489        let json = sm.to_source_map_v3_json();
490        assert!(json.contains("\"version\":3"));
491        assert!(json.contains("\"file\":\"output.js\""));
492        assert!(json.contains("\"sources\":[\"main.bock\"]"));
493        assert!(json.contains("\"mappings\":"));
494    }
495
496    // ── module_declares_main_fn ─────────────────────────────────────────────
497
498    use bock_air::AIRNode;
499    use bock_ast::{Ident, Visibility};
500    use bock_errors::{FileId, Span};
501
502    fn dummy_span() -> Span {
503        Span {
504            file: FileId(0),
505            start: 0,
506            end: 0,
507        }
508    }
509
510    fn ident(name: &str) -> Ident {
511        Ident {
512            name: name.to_string(),
513            span: dummy_span(),
514        }
515    }
516
517    fn fn_decl(name: &str) -> AIRNode {
518        let body = AIRNode::new(
519            1,
520            dummy_span(),
521            NodeKind::Block {
522                stmts: vec![],
523                tail: None,
524            },
525        );
526        AIRNode::new(
527            0,
528            dummy_span(),
529            NodeKind::FnDecl {
530                annotations: vec![],
531                visibility: Visibility::Public,
532                is_async: false,
533                name: ident(name),
534                generic_params: vec![],
535                params: vec![],
536                return_type: None,
537                effect_clause: vec![],
538                where_clause: vec![],
539                body: Box::new(body),
540            },
541        )
542    }
543
544    fn module_with(items: Vec<AIRNode>) -> AIRNode {
545        AIRNode::new(
546            0,
547            dummy_span(),
548            NodeKind::Module {
549                path: None,
550                annotations: vec![],
551                imports: vec![],
552                items,
553            },
554        )
555    }
556
557    #[test]
558    fn module_declares_main_detects_top_level_main() {
559        let m = module_with(vec![fn_decl("helper"), fn_decl("main")]);
560        assert!(module_declares_main_fn(&m));
561    }
562
563    #[test]
564    fn module_declares_main_returns_false_when_absent() {
565        let m = module_with(vec![fn_decl("helper"), fn_decl("other")]);
566        assert!(!module_declares_main_fn(&m));
567    }
568
569    #[test]
570    fn module_declares_main_returns_false_for_empty_module() {
571        let m = module_with(vec![]);
572        assert!(!module_declares_main_fn(&m));
573    }
574}