Skip to main content

probar_js_gen/
hir.rs

1//! High-level Intermediate Representation for JavaScript.
2//!
3//! # Design Principles
4//!
5//! 1. **Type Safety**: All JS constructs have typed Rust equivalents
6//! 2. **Validation**: Invalid JS is unrepresentable in the type system
7//! 3. **Determinism**: Same HIR always produces same JS output
8//!
9//! # References
10//! - Maffeis et al. (2008) "An Operational Semantics for JavaScript"
11//! - Guha et al. (2010) "The Essence of JavaScript"
12//! - ECMA-262 (ES2022) Specification
13
14use serde::{Deserialize, Serialize};
15
16/// A complete JavaScript module.
17///
18/// Modules are the top-level unit of code generation.
19/// They contain statements and track generation metadata.
20#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
21pub struct JsModule {
22    /// Module-level statements
23    pub statements: Vec<Stmt>,
24    /// Generation metadata (tool version, hash, etc.)
25    pub metadata: Option<GenerationMetadata>,
26}
27
28impl Default for JsModule {
29    fn default() -> Self {
30        Self::new()
31    }
32}
33
34impl JsModule {
35    /// Create a new empty module.
36    #[must_use]
37    pub const fn new() -> Self {
38        Self {
39            statements: Vec::new(),
40            metadata: None,
41        }
42    }
43}
44
45/// Metadata about code generation for immutability enforcement.
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
47pub struct GenerationMetadata {
48    /// Tool that generated this code
49    pub tool: String,
50    /// Tool version
51    pub version: String,
52    /// Blake3 hash of input specification
53    pub input_hash: String,
54    /// ISO 8601 timestamp
55    pub timestamp: String,
56    /// Command to regenerate
57    pub regenerate_cmd: String,
58}
59
60/// JavaScript statement.
61///
62/// Each variant maps to a specific JS statement type.
63/// Invalid combinations are prevented at compile time.
64#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
65pub enum Stmt {
66    /// Variable declaration: `let name = expr;`
67    Let {
68        /// Variable name
69        name: Identifier,
70        /// Initial value
71        value: Expr,
72    },
73    /// Constant declaration: `const name = expr;`
74    Const {
75        /// Constant name
76        name: Identifier,
77        /// Value
78        value: Expr,
79    },
80    /// Assignment: `name = expr;`
81    Assign {
82        /// Target name
83        name: Identifier,
84        /// New value
85        value: Expr,
86    },
87    /// Member assignment: `obj.member = value;`
88    MemberAssign {
89        /// Object expression
90        object: Expr,
91        /// Member name
92        member: Identifier,
93        /// New value
94        value: Expr,
95    },
96    /// Compound assignment: `target += value;`
97    AddAssign {
98        /// Target expression
99        target: Expr,
100        /// Value to add
101        value: Expr,
102    },
103    /// Post-increment: `expr++;`
104    PostIncrement(Expr),
105    /// Expression statement: `expr;`
106    Expr(Expr),
107    /// Return statement: `return expr;` or `return;`
108    Return(Option<Expr>),
109    /// If statement: `if (cond) { then } else { else }`
110    If {
111        /// Condition
112        condition: Expr,
113        /// Then branch
114        then_branch: Vec<Stmt>,
115        /// Optional else branch
116        else_branch: Option<Vec<Stmt>>,
117    },
118    /// For loop: `for (let i = start; i < end; i++) { body }`
119    For {
120        /// Loop variable
121        var: Identifier,
122        /// Start value
123        start: Expr,
124        /// End value (exclusive)
125        end: Expr,
126        /// Loop body
127        body: Vec<Stmt>,
128    },
129    /// While loop: `while (cond) { body }`
130    While {
131        /// Condition
132        condition: Expr,
133        /// Loop body
134        body: Vec<Stmt>,
135    },
136    /// Try-catch: `try { body } catch (e) { handler }`
137    TryCatch {
138        /// Try body
139        body: Vec<Stmt>,
140        /// Catch variable name
141        catch_var: Identifier,
142        /// Catch handler
143        handler: Vec<Stmt>,
144    },
145    /// Block: `{ stmts }`
146    Block(Vec<Stmt>),
147    /// Comment: `// text`
148    Comment(String),
149    /// Class definition
150    Class(JsClass),
151    /// Switch statement
152    Switch(JsSwitch),
153    /// `self.onmessage = async function(e) { body }`
154    OnMessage(Vec<Stmt>),
155    /// `registerProcessor(name, class)`
156    RegisterProcessor {
157        /// Processor name
158        name: String,
159        /// Class name
160        class: Identifier,
161    },
162}
163
164/// JavaScript expression.
165///
166/// All expression types that can appear in JS code.
167#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
168pub enum Expr {
169    /// Null literal
170    Null,
171    /// Boolean literal
172    Bool(bool),
173    /// Number literal
174    Num(f64),
175    /// String literal (will be properly escaped)
176    Str(String),
177    /// Identifier reference
178    Ident(Identifier),
179    /// `this` keyword
180    This,
181    /// Member access: `obj.prop`
182    Member {
183        /// Object
184        object: Box<Expr>,
185        /// Property name
186        property: Identifier,
187    },
188    /// Computed member: `obj[expr]`
189    Index {
190        /// Object
191        object: Box<Expr>,
192        /// Index expression
193        index: Box<Expr>,
194    },
195    /// Function call: `func(args)`
196    Call {
197        /// Function expression
198        callee: Box<Expr>,
199        /// Arguments
200        args: Vec<Expr>,
201    },
202    /// `new Constructor(args)`
203    New {
204        /// Constructor
205        constructor: Box<Expr>,
206        /// Arguments
207        args: Vec<Expr>,
208    },
209    /// `await expr`
210    Await(Box<Expr>),
211    /// `import(path)` - dynamic import
212    Import(Box<Expr>),
213    /// Binary operation: `left op right`
214    Binary {
215        /// Left operand
216        left: Box<Expr>,
217        /// Operator
218        op: BinOp,
219        /// Right operand
220        right: Box<Expr>,
221    },
222    /// Unary operation: `op expr`
223    Unary {
224        /// Operator
225        op: UnaryOp,
226        /// Operand
227        operand: Box<Expr>,
228    },
229    /// Ternary: `cond ? then : else`
230    Ternary {
231        /// Condition
232        condition: Box<Expr>,
233        /// Then expression
234        then_expr: Box<Expr>,
235        /// Else expression
236        else_expr: Box<Expr>,
237    },
238    /// Object literal: `{ key: value, ... }`
239    Object(Vec<(String, Expr)>),
240    /// Array literal: `[expr, ...]`
241    Array(Vec<Expr>),
242    /// Arrow function: `(params) => expr`
243    Arrow {
244        /// Parameters
245        params: Vec<Identifier>,
246        /// Body expression
247        body: Box<Expr>,
248    },
249    /// Arrow function with block: `(params) => { stmts }`
250    ArrowBlock {
251        /// Parameters
252        params: Vec<Identifier>,
253        /// Body statements
254        body: Vec<Stmt>,
255    },
256    /// Assignment expression: `left = right`
257    Assign {
258        /// Target
259        target: Box<Expr>,
260        /// Value
261        value: Box<Expr>,
262    },
263}
264
265/// Binary operators.
266#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
267pub enum BinOp {
268    /// Addition: `+`
269    Add,
270    /// Subtraction: `-`
271    Sub,
272    /// Multiplication: `*`
273    Mul,
274    /// Division: `/`
275    Div,
276    /// Modulo: `%`
277    Mod,
278    /// Equality: `==`
279    Eq,
280    /// Strict equality: `===`
281    EqStrict,
282    /// Inequality: `!=`
283    Ne,
284    /// Strict inequality: `!==`
285    NeStrict,
286    /// Less than: `<`
287    Lt,
288    /// Less than or equal: `<=`
289    Le,
290    /// Greater than: `>`
291    Gt,
292    /// Greater than or equal: `>=`
293    Ge,
294    /// Logical and: `&&`
295    And,
296    /// Logical or: `||`
297    Or,
298    /// Bitwise and: `&`
299    BitAnd,
300    /// Bitwise or: `|`
301    BitOr,
302}
303
304impl BinOp {
305    /// Get the JavaScript operator string.
306    #[must_use]
307    pub const fn as_str(self) -> &'static str {
308        match self {
309            Self::Add => "+",
310            Self::Sub => "-",
311            Self::Mul => "*",
312            Self::Div => "/",
313            Self::Mod => "%",
314            Self::Eq => "==",
315            Self::EqStrict => "===",
316            Self::Ne => "!=",
317            Self::NeStrict => "!==",
318            Self::Lt => "<",
319            Self::Le => "<=",
320            Self::Gt => ">",
321            Self::Ge => ">=",
322            Self::And => "&&",
323            Self::Or => "||",
324            Self::BitAnd => "&",
325            Self::BitOr => "|",
326        }
327    }
328}
329
330/// Unary operators.
331#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
332pub enum UnaryOp {
333    /// Logical not: `!`
334    Not,
335    /// Negation: `-`
336    Neg,
337    /// Type of: `typeof`
338    TypeOf,
339}
340
341impl UnaryOp {
342    /// Get the JavaScript operator string.
343    #[must_use]
344    pub const fn as_str(self) -> &'static str {
345        match self {
346            Self::Not => "!",
347            Self::Neg => "-",
348            Self::TypeOf => "typeof ",
349        }
350    }
351}
352
353/// A validated JavaScript identifier.
354///
355/// Identifiers are validated at construction time to ensure they:
356/// - Are not reserved words
357/// - Contain only valid characters
358/// - Don't start with a digit
359#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
360pub struct Identifier(String);
361
362impl Identifier {
363    /// JavaScript reserved words that cannot be used as identifiers.
364    pub const RESERVED_WORDS: &'static [&'static str] = &[
365        "break",
366        "case",
367        "catch",
368        "continue",
369        "debugger",
370        "default",
371        "delete",
372        "do",
373        "else",
374        "finally",
375        "for",
376        "function",
377        "if",
378        "in",
379        "instanceof",
380        "new",
381        "return",
382        "switch",
383        "this",
384        "throw",
385        "try",
386        "typeof",
387        "var",
388        "void",
389        "while",
390        "with",
391        "class",
392        "const",
393        "enum",
394        "export",
395        "extends",
396        "import",
397        "super",
398        "implements",
399        "interface",
400        "let",
401        "package",
402        "private",
403        "protected",
404        "public",
405        "static",
406        "yield",
407        "await",
408        "null",
409        "true",
410        "false",
411    ];
412
413    /// Create a new identifier, validating it.
414    ///
415    /// # Errors
416    ///
417    /// Returns an error if the identifier is:
418    /// - Empty
419    /// - A reserved word
420    /// - Contains invalid characters
421    /// - Starts with a digit
422    pub fn new(name: impl Into<String>) -> crate::Result<Self> {
423        let name = name.into();
424
425        if name.is_empty() {
426            return Err(crate::JsGenError::InvalidIdentifier {
427                name,
428                reason: "identifier cannot be empty".to_string(),
429            });
430        }
431
432        // Check first character
433        let first = name.chars().next().unwrap_or(' ');
434        if first.is_ascii_digit() {
435            return Err(crate::JsGenError::InvalidIdentifier {
436                name,
437                reason: "identifier cannot start with a digit".to_string(),
438            });
439        }
440
441        // Check all characters
442        for c in name.chars() {
443            if !c.is_ascii_alphanumeric() && c != '_' && c != '$' {
444                return Err(crate::JsGenError::InvalidIdentifier {
445                    name,
446                    reason: format!("invalid character '{c}'"),
447                });
448            }
449        }
450
451        // Check reserved words
452        if Self::RESERVED_WORDS.contains(&name.as_str()) {
453            return Err(crate::JsGenError::InvalidIdentifier {
454                name,
455                reason: "reserved word".to_string(),
456            });
457        }
458
459        Ok(Self(name))
460    }
461
462    /// Create an identifier without validation (for trusted input).
463    ///
464    /// # Safety
465    ///
466    /// This is not unsafe in the memory sense, but it bypasses validation.
467    /// Only use for identifiers known to be valid at compile time.
468    #[must_use]
469    pub fn new_unchecked(name: &'static str) -> Self {
470        Self(name.to_string())
471    }
472
473    /// Get the identifier string.
474    #[must_use]
475    pub fn as_str(&self) -> &str {
476        &self.0
477    }
478}
479
480impl std::fmt::Display for Identifier {
481    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
482        write!(f, "{}", self.0)
483    }
484}
485
486/// JavaScript class definition.
487#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
488pub struct JsClass {
489    /// Class name
490    pub name: Identifier,
491    /// Parent class (extends)
492    pub extends: Option<Identifier>,
493    /// Constructor body (super() is added automatically if extends is set)
494    pub constructor: Option<Vec<Stmt>>,
495    /// Methods
496    pub methods: Vec<JsMethod>,
497}
498
499/// JavaScript class method.
500#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
501pub struct JsMethod {
502    /// Method name
503    pub name: Identifier,
504    /// Parameters
505    pub params: Vec<Identifier>,
506    /// Method body
507    pub body: Vec<Stmt>,
508}
509
510/// JavaScript switch statement.
511#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
512pub struct JsSwitch {
513    /// Expression to switch on
514    pub expr: Expr,
515    /// Cases
516    pub cases: Vec<(Expr, Vec<Stmt>)>,
517    /// Default case
518    pub default: Option<Vec<Stmt>>,
519}
520
521#[cfg(test)]
522#[allow(clippy::unwrap_used)]
523mod tests {
524    use super::*;
525
526    #[test]
527    fn identifier_valid() {
528        assert!(Identifier::new("foo").is_ok());
529        assert!(Identifier::new("_bar").is_ok());
530        assert!(Identifier::new("$baz").is_ok());
531        assert!(Identifier::new("foo123").is_ok());
532        assert!(Identifier::new("camelCase").is_ok());
533    }
534
535    #[test]
536    fn identifier_invalid_reserved() {
537        let err = Identifier::new("class").unwrap_err();
538        assert!(err.to_string().contains("reserved word"));
539    }
540
541    #[test]
542    fn identifier_invalid_starts_digit() {
543        let err = Identifier::new("123foo").unwrap_err();
544        assert!(err.to_string().contains("cannot start with a digit"));
545    }
546
547    #[test]
548    fn identifier_invalid_empty() {
549        let err = Identifier::new("").unwrap_err();
550        assert!(err.to_string().contains("cannot be empty"));
551    }
552
553    #[test]
554    fn identifier_invalid_chars() {
555        let err = Identifier::new("foo-bar").unwrap_err();
556        assert!(err.to_string().contains("invalid character"));
557    }
558
559    #[test]
560    fn binop_as_str() {
561        assert_eq!(BinOp::Add.as_str(), "+");
562        assert_eq!(BinOp::EqStrict.as_str(), "===");
563        assert_eq!(BinOp::And.as_str(), "&&");
564    }
565
566    #[test]
567    fn unaryop_as_str() {
568        assert_eq!(UnaryOp::Not.as_str(), "!");
569        assert_eq!(UnaryOp::TypeOf.as_str(), "typeof ");
570    }
571}