oxc_codegen/
lib.rs

1//! Oxc Codegen
2//!
3//! Code adapted from
4//! * [esbuild](https://github.com/evanw/esbuild/blob/v0.24.0/internal/js_printer/js_printer.go)
5#![warn(missing_docs)]
6
7mod binary_expr_visitor;
8mod comment;
9mod context;
10mod r#gen;
11mod operator;
12mod options;
13mod sourcemap_builder;
14mod str;
15
16use std::borrow::Cow;
17
18use oxc_ast::ast::*;
19use oxc_data_structures::{code_buffer::CodeBuffer, stack::Stack};
20use oxc_semantic::Scoping;
21use oxc_span::{GetSpan, SPAN, Span};
22use oxc_syntax::{
23    identifier::{is_identifier_part, is_identifier_part_ascii},
24    operator::{BinaryOperator, UnaryOperator, UpdateOperator},
25    precedence::Precedence,
26};
27
28use crate::{
29    binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator,
30    sourcemap_builder::SourcemapBuilder, str::Quote,
31};
32pub use crate::{
33    context::Context,
34    r#gen::{Gen, GenExpr},
35    options::{CodegenOptions, LegalComment},
36};
37
38/// Output from [`Codegen::build`]
39#[non_exhaustive]
40pub struct CodegenReturn {
41    /// The generated source code.
42    pub code: String,
43
44    /// The source map from the input source code to the generated source code.
45    ///
46    /// You must set [`CodegenOptions::source_map_path`] for this to be [`Some`].
47    pub map: Option<oxc_sourcemap::SourceMap>,
48
49    /// All the legal comments returned from [LegalComment::Linked] or [LegalComment::External].
50    pub legal_comments: Vec<Comment>,
51}
52
53/// A code generator for printing JavaScript and TypeScript code.
54///
55/// ## Example
56/// ```rust
57/// use oxc_codegen::{Codegen, CodegenOptions};
58/// use oxc_ast::ast::Program;
59/// use oxc_parser::Parser;
60/// use oxc_allocator::Allocator;
61/// use oxc_span::SourceType;
62///
63/// let allocator = Allocator::default();
64/// let source = "const a = 1 + 2;";
65/// let parsed = Parser::new(&allocator, source, SourceType::mjs()).parse();
66/// assert!(parsed.errors.is_empty());
67///
68/// let js = Codegen::new().build(&parsed.program);
69/// assert_eq!(js.code, "const a = 1 + 2;\n");
70/// ```
71pub struct Codegen<'a> {
72    pub(crate) options: CodegenOptions,
73
74    /// Original source code of the AST
75    source_text: Option<&'a str>,
76
77    scoping: Option<Scoping>,
78
79    /// Output Code
80    code: CodeBuffer,
81
82    // states
83    prev_op_end: usize,
84    prev_reg_exp_end: usize,
85    need_space_before_dot: usize,
86    print_next_indent_as_space: bool,
87    binary_expr_stack: Stack<BinaryExpressionVisitor<'a>>,
88    /// Indicates the output is JSX type, it is set in [`Program::gen`] and the result
89    /// is obtained by [`oxc_span::SourceType::is_jsx`]
90    is_jsx: bool,
91
92    /// For avoiding `;` if the previous statement ends with `}`.
93    needs_semicolon: bool,
94
95    prev_op: Option<Operator>,
96
97    start_of_stmt: usize,
98    start_of_arrow_expr: usize,
99    start_of_default_export: usize,
100
101    /// Track the current indentation level
102    indent: u32,
103
104    /// Fast path for [CodegenOptions::single_quote]
105    quote: Quote,
106
107    // Builders
108    comments: CommentsMap,
109
110    sourcemap_builder: Option<SourcemapBuilder>,
111}
112
113impl Default for Codegen<'_> {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119impl<'a> From<Codegen<'a>> for String {
120    fn from(val: Codegen<'a>) -> Self {
121        val.into_source_text()
122    }
123}
124
125impl<'a> From<Codegen<'a>> for Cow<'a, str> {
126    fn from(val: Codegen<'a>) -> Self {
127        Cow::Owned(val.into_source_text())
128    }
129}
130
131// Public APIs
132impl<'a> Codegen<'a> {
133    /// Create a new code generator.
134    ///
135    /// This is equivalent to [`Codegen::default`].
136    #[must_use]
137    pub fn new() -> Self {
138        let options = CodegenOptions::default();
139        Self {
140            options,
141            source_text: None,
142            scoping: None,
143            code: CodeBuffer::default(),
144            needs_semicolon: false,
145            need_space_before_dot: 0,
146            print_next_indent_as_space: false,
147            binary_expr_stack: Stack::with_capacity(12),
148            prev_op_end: 0,
149            prev_reg_exp_end: 0,
150            prev_op: None,
151            start_of_stmt: 0,
152            start_of_arrow_expr: 0,
153            start_of_default_export: 0,
154            is_jsx: false,
155            indent: 0,
156            quote: Quote::Double,
157            comments: CommentsMap::default(),
158            sourcemap_builder: None,
159        }
160    }
161
162    /// Pass options to the code generator.
163    #[must_use]
164    pub fn with_options(mut self, options: CodegenOptions) -> Self {
165        self.quote = if options.single_quote { Quote::Single } else { Quote::Double };
166        self.options = options;
167        self
168    }
169
170    /// Sets the source text for the code generator.
171    #[must_use]
172    pub fn with_source_text(mut self, source_text: &'a str) -> Self {
173        self.source_text = Some(source_text);
174        self
175    }
176
177    /// Set the symbol table used for identifier renaming.
178    ///
179    /// Can be used for easy renaming of variables (based on semantic analysis).
180    #[must_use]
181    pub fn with_scoping(mut self, scoping: Option<Scoping>) -> Self {
182        self.scoping = scoping;
183        self
184    }
185
186    /// Print a [`Program`] into a string of source code.
187    ///
188    /// A source map will be generated if [`CodegenOptions::source_map_path`] is set.
189    #[must_use]
190    pub fn build(mut self, program: &Program<'a>) -> CodegenReturn {
191        self.quote = if self.options.single_quote { Quote::Single } else { Quote::Double };
192        self.source_text = Some(program.source_text);
193        self.code.reserve(program.source_text.len());
194        self.build_comments(&program.comments);
195        if let Some(path) = &self.options.source_map_path {
196            self.sourcemap_builder = Some(SourcemapBuilder::new(path, program.source_text));
197        }
198        program.print(&mut self, Context::default());
199        let legal_comments = self.handle_eof_linked_or_external_comments(program);
200        let code = self.code.into_string();
201        let map = self.sourcemap_builder.map(SourcemapBuilder::into_sourcemap);
202        CodegenReturn { code, map, legal_comments }
203    }
204
205    /// Turn what's been built so far into a string. Like [`build`],
206    /// this fininishes a print and returns the generated source code. Unlike
207    /// [`build`], no source map is generated.
208    ///
209    /// This is more useful for cases that progressively build code using [`print_expression`].
210    ///
211    /// [`build`]: Codegen::build
212    /// [`print_expression`]: Codegen::print_expression
213    #[must_use]
214    pub fn into_source_text(self) -> String {
215        self.code.into_string()
216    }
217
218    /// Push a single ASCII byte into the buffer.
219    ///
220    /// # Panics
221    /// Panics if `byte` is not an ASCII byte (`0 - 0x7F`).
222    #[inline]
223    pub fn print_ascii_byte(&mut self, byte: u8) {
224        self.code.print_ascii_byte(byte);
225    }
226
227    /// Push str into the buffer
228    #[inline]
229    pub fn print_str(&mut self, s: &str) {
230        self.code.print_str(s);
231    }
232
233    /// Print a single [`Expression`], adding it to the code generator's
234    /// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
235    #[inline]
236    pub fn print_expression(&mut self, expr: &Expression<'_>) {
237        expr.print_expr(self, Precedence::Lowest, Context::empty());
238    }
239}
240
241// Private APIs
242impl<'a> Codegen<'a> {
243    fn code(&self) -> &CodeBuffer {
244        &self.code
245    }
246
247    fn code_len(&self) -> usize {
248        self.code().len()
249    }
250
251    #[inline]
252    fn print_soft_space(&mut self) {
253        if !self.options.minify {
254            self.print_ascii_byte(b' ');
255        }
256    }
257
258    #[inline]
259    fn print_hard_space(&mut self) {
260        self.print_ascii_byte(b' ');
261    }
262
263    #[inline]
264    fn print_soft_newline(&mut self) {
265        if !self.options.minify {
266            self.print_ascii_byte(b'\n');
267        }
268    }
269
270    #[inline]
271    fn print_hard_newline(&mut self) {
272        self.print_ascii_byte(b'\n');
273    }
274
275    #[inline]
276    fn print_semicolon(&mut self) {
277        self.print_ascii_byte(b';');
278    }
279
280    #[inline]
281    fn print_comma(&mut self) {
282        self.print_ascii_byte(b',');
283    }
284
285    #[inline]
286    fn print_space_before_identifier(&mut self) {
287        let Some(byte) = self.last_byte() else { return };
288
289        if self.prev_reg_exp_end != self.code.len() {
290            let is_identifier = if byte.is_ascii() {
291                // Fast path for ASCII (very common case)
292                is_identifier_part_ascii(byte as char)
293            } else {
294                is_identifier_part(self.last_char().unwrap())
295            };
296            if !is_identifier {
297                return;
298            }
299        }
300
301        self.print_hard_space();
302    }
303
304    #[inline]
305    fn last_byte(&self) -> Option<u8> {
306        self.code.last_byte()
307    }
308
309    #[inline]
310    fn last_char(&self) -> Option<char> {
311        self.code.last_char()
312    }
313
314    #[inline]
315    fn indent(&mut self) {
316        if !self.options.minify {
317            self.indent += 1;
318        }
319    }
320
321    #[inline]
322    fn dedent(&mut self) {
323        if !self.options.minify {
324            self.indent -= 1;
325        }
326    }
327
328    #[inline]
329    fn wrap<F: FnMut(&mut Self)>(&mut self, wrap: bool, mut f: F) {
330        if wrap {
331            self.print_ascii_byte(b'(');
332        }
333        f(self);
334        if wrap {
335            self.print_ascii_byte(b')');
336        }
337    }
338
339    #[inline]
340    fn print_indent(&mut self) {
341        if self.options.minify {
342            return;
343        }
344        if self.print_next_indent_as_space {
345            self.print_hard_space();
346            self.print_next_indent_as_space = false;
347            return;
348        }
349        self.code.print_indent(self.indent as usize);
350    }
351
352    #[inline]
353    fn print_semicolon_after_statement(&mut self) {
354        if self.options.minify {
355            self.needs_semicolon = true;
356        } else {
357            self.print_str(";\n");
358        }
359    }
360
361    #[inline]
362    fn print_semicolon_if_needed(&mut self) {
363        if self.needs_semicolon {
364            self.print_semicolon();
365            self.needs_semicolon = false;
366        }
367    }
368
369    #[inline]
370    fn print_ellipsis(&mut self) {
371        self.print_str("...");
372    }
373
374    #[inline]
375    fn print_colon(&mut self) {
376        self.print_ascii_byte(b':');
377    }
378
379    #[inline]
380    fn print_equal(&mut self) {
381        self.print_ascii_byte(b'=');
382    }
383
384    fn print_curly_braces<F: FnOnce(&mut Self)>(&mut self, span: Span, single_line: bool, op: F) {
385        self.add_source_mapping(span);
386        self.print_ascii_byte(b'{');
387        if !single_line {
388            self.print_soft_newline();
389            self.indent();
390        }
391        op(self);
392        if !single_line {
393            self.dedent();
394            self.print_indent();
395        }
396        self.add_source_mapping_end(span);
397        self.print_ascii_byte(b'}');
398    }
399
400    fn print_block_start(&mut self, span: Span) {
401        self.add_source_mapping(span);
402        self.print_ascii_byte(b'{');
403        self.print_soft_newline();
404        self.indent();
405    }
406
407    fn print_block_end(&mut self, span: Span) {
408        self.dedent();
409        self.print_indent();
410        self.add_source_mapping_end(span);
411        self.print_ascii_byte(b'}');
412    }
413
414    fn print_body(&mut self, stmt: &Statement<'_>, need_space: bool, ctx: Context) {
415        match stmt {
416            Statement::BlockStatement(stmt) => {
417                self.print_soft_space();
418                self.print_block_statement(stmt, ctx);
419                self.print_soft_newline();
420            }
421            Statement::EmptyStatement(_) => {
422                self.print_semicolon();
423                self.print_soft_newline();
424            }
425            stmt => {
426                if need_space && self.options.minify {
427                    self.print_hard_space();
428                }
429                self.print_next_indent_as_space = true;
430                stmt.print(self, ctx);
431            }
432        }
433    }
434
435    fn print_block_statement(&mut self, stmt: &BlockStatement<'_>, ctx: Context) {
436        self.print_curly_braces(stmt.span, stmt.body.is_empty(), |p| {
437            for stmt in &stmt.body {
438                p.print_semicolon_if_needed();
439                stmt.print(p, ctx);
440            }
441        });
442        self.needs_semicolon = false;
443    }
444
445    fn print_directives_and_statements(
446        &mut self,
447        directives: &[Directive<'_>],
448        stmts: &[Statement<'_>],
449        ctx: Context,
450    ) {
451        for directive in directives {
452            directive.print(self, ctx);
453        }
454        let Some((first, rest)) = stmts.split_first() else {
455            return;
456        };
457
458        // Ensure first string literal is not a directive.
459        let mut first_needs_parens = false;
460        if directives.is_empty() && !self.options.minify {
461            if let Statement::ExpressionStatement(s) = first {
462                let s = s.expression.without_parentheses();
463                if matches!(s, Expression::StringLiteral(_)) {
464                    first_needs_parens = true;
465                    self.print_ascii_byte(b'(');
466                    s.print_expr(self, Precedence::Lowest, ctx);
467                    self.print_ascii_byte(b')');
468                    self.print_semicolon_after_statement();
469                }
470            }
471        }
472
473        if !first_needs_parens {
474            first.print(self, ctx);
475        }
476
477        for stmt in rest {
478            self.print_semicolon_if_needed();
479            stmt.print(self, ctx);
480        }
481    }
482
483    #[inline]
484    fn print_list<T: Gen>(&mut self, items: &[T], ctx: Context) {
485        let Some((first, rest)) = items.split_first() else {
486            return;
487        };
488        first.print(self, ctx);
489        for item in rest {
490            self.print_comma();
491            self.print_soft_space();
492            item.print(self, ctx);
493        }
494    }
495
496    #[inline]
497    fn print_expressions<T: GenExpr>(&mut self, items: &[T], precedence: Precedence, ctx: Context) {
498        let Some((first, rest)) = items.split_first() else {
499            return;
500        };
501        first.print_expr(self, precedence, ctx);
502        for item in rest {
503            self.print_comma();
504            self.print_soft_space();
505            item.print_expr(self, precedence, ctx);
506        }
507    }
508
509    fn print_arguments(&mut self, span: Span, arguments: &[Argument<'_>], ctx: Context) {
510        self.print_ascii_byte(b'(');
511
512        let has_comment_before_right_paren = span.end > 0 && self.has_comment(span.end - 1);
513
514        let has_comment = has_comment_before_right_paren
515            || arguments.iter().any(|item| self.has_comment(item.span().start));
516
517        if has_comment {
518            self.indent();
519            self.print_list_with_comments(arguments, ctx);
520            // Handle `/* comment */);`
521            if !has_comment_before_right_paren
522                || (span.end > 0 && !self.print_expr_comments(span.end - 1))
523            {
524                self.print_soft_newline();
525            }
526            self.dedent();
527            self.print_indent();
528        } else {
529            self.print_list(arguments, ctx);
530        }
531        self.print_ascii_byte(b')');
532    }
533
534    fn print_list_with_comments(&mut self, items: &[Argument<'_>], ctx: Context) {
535        let Some((first, rest)) = items.split_first() else {
536            return;
537        };
538        if self.print_expr_comments(first.span().start) {
539            self.print_indent();
540        } else {
541            self.print_soft_newline();
542            self.print_indent();
543        }
544        first.print(self, ctx);
545        for item in rest {
546            self.print_comma();
547            if self.print_expr_comments(item.span().start) {
548                self.print_indent();
549            } else {
550                self.print_soft_newline();
551                self.print_indent();
552            }
553            item.print(self, ctx);
554        }
555    }
556
557    fn get_identifier_reference_name(&self, reference: &IdentifierReference<'a>) -> &'a str {
558        if let Some(scoping) = &self.scoping {
559            if let Some(reference_id) = reference.reference_id.get() {
560                if let Some(name) = scoping.get_reference_name(reference_id) {
561                    // SAFETY: Hack the lifetime to be part of the allocator.
562                    return unsafe { std::mem::transmute_copy(&name) };
563                }
564            }
565        }
566        reference.name.as_str()
567    }
568
569    fn get_binding_identifier_name(&self, ident: &BindingIdentifier<'a>) -> &'a str {
570        if let Some(scoping) = &self.scoping {
571            if let Some(symbol_id) = ident.symbol_id.get() {
572                let name = scoping.symbol_name(symbol_id);
573                // SAFETY: Hack the lifetime to be part of the allocator.
574                return unsafe { std::mem::transmute_copy(&name) };
575            }
576        }
577        ident.name.as_str()
578    }
579
580    fn print_space_before_operator(&mut self, next: Operator) {
581        if self.prev_op_end != self.code.len() {
582            return;
583        }
584        let Some(prev) = self.prev_op else { return };
585        // "+ + y" => "+ +y"
586        // "+ ++ y" => "+ ++y"
587        // "x + + y" => "x+ +y"
588        // "x ++ + y" => "x+++y"
589        // "x + ++ y" => "x+ ++y"
590        // "-- >" => "-- >"
591        // "< ! --" => "<! --"
592        let bin_op_add = Operator::Binary(BinaryOperator::Addition);
593        let bin_op_sub = Operator::Binary(BinaryOperator::Subtraction);
594        let un_op_pos = Operator::Unary(UnaryOperator::UnaryPlus);
595        let un_op_pre_inc = Operator::Update(UpdateOperator::Increment);
596        let un_op_neg = Operator::Unary(UnaryOperator::UnaryNegation);
597        let un_op_pre_dec = Operator::Update(UpdateOperator::Decrement);
598        let un_op_post_dec = Operator::Update(UpdateOperator::Decrement);
599        let bin_op_gt = Operator::Binary(BinaryOperator::GreaterThan);
600        let un_op_not = Operator::Unary(UnaryOperator::LogicalNot);
601        if ((prev == bin_op_add || prev == un_op_pos)
602            && (next == bin_op_add || next == un_op_pos || next == un_op_pre_inc))
603            || ((prev == bin_op_sub || prev == un_op_neg)
604                && (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
605            || (prev == un_op_post_dec && next == bin_op_gt)
606            || (prev == un_op_not
607                && next == un_op_pre_dec
608                // `prev == UnaryOperator::LogicalNot` which means last byte is ASCII,
609                // and therefore previous character is 1 byte from end of buffer
610                && self.code.peek_nth_byte_back(1) == Some(b'<'))
611        {
612            self.print_hard_space();
613        }
614    }
615
616    fn print_non_negative_float(&mut self, num: f64) {
617        // Inline the buffer here to avoid heap allocation on `buffer.format(*self).to_string()`.
618        let mut buffer = ryu_js::Buffer::new();
619        if num < 1000.0 && num.fract() == 0.0 {
620            self.print_str(buffer.format(num));
621            self.need_space_before_dot = self.code_len();
622        } else {
623            let s = Self::get_minified_number(num, &mut buffer);
624            self.print_str(&s);
625            if !s.bytes().any(|b| matches!(b, b'.' | b'e' | b'x')) {
626                self.need_space_before_dot = self.code_len();
627            }
628        }
629    }
630
631    // `get_minified_number` from terser
632    // https://github.com/terser/terser/blob/c5315c3fd6321d6b2e076af35a70ef532f498505/lib/output.js#L2418
633    #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::cast_possible_wrap)]
634    fn get_minified_number(num: f64, buffer: &mut ryu_js::Buffer) -> Cow<'_, str> {
635        use cow_utils::CowUtils;
636
637        if num < 1000.0 && num.fract() == 0.0 {
638            return Cow::Borrowed(buffer.format(num));
639        }
640
641        let mut s = buffer.format(num);
642
643        if s.starts_with("0.") {
644            s = &s[1..];
645        }
646
647        let s = s.cow_replacen("e+", "e", 1);
648
649        let mut candidates = vec![s.clone()];
650
651        if num.fract() == 0.0 {
652            candidates.push(Cow::Owned(format!("0x{:x}", num as u128)));
653        }
654
655        // create `1e-2`
656        if s.starts_with(".0") {
657            if let Some((i, _)) = s[1..].bytes().enumerate().find(|(_, c)| *c != b'0') {
658                let len = i + 1; // `+1` to include the dot.
659                let digits = &s[len..];
660                candidates.push(Cow::Owned(format!("{digits}e-{}", digits.len() + len - 1)));
661            }
662        }
663
664        // create 1e2
665        if s.ends_with('0') {
666            if let Some((len, _)) = s.bytes().rev().enumerate().find(|(_, c)| *c != b'0') {
667                candidates.push(Cow::Owned(format!("{}e{len}", &s[0..s.len() - len])));
668            }
669        }
670
671        // `1.2e101` -> ("1", "2", "101")
672        // `1.3415205933077406e300` -> `13415205933077406e284;`
673        if let Some((integer, point, exponent)) =
674            s.split_once('.').and_then(|(a, b)| b.split_once('e').map(|e| (a, e.0, e.1)))
675        {
676            candidates.push(Cow::Owned(format!(
677                "{integer}{point}e{}",
678                exponent.parse::<isize>().unwrap() - point.len() as isize
679            )));
680        }
681
682        candidates.into_iter().min_by_key(|c| c.len()).unwrap()
683    }
684
685    fn add_source_mapping(&mut self, span: Span) {
686        if span == SPAN {
687            return;
688        }
689        if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() {
690            sourcemap_builder.add_source_mapping(self.code.as_bytes(), span.start, None);
691        }
692    }
693
694    fn add_source_mapping_end(&mut self, span: Span) {
695        if span == SPAN {
696            return;
697        }
698        if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() {
699            sourcemap_builder.add_source_mapping(self.code.as_bytes(), span.end, None);
700        }
701    }
702
703    fn add_source_mapping_for_name(&mut self, span: Span, name: &str) {
704        if span == SPAN {
705            return;
706        }
707        if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut() {
708            sourcemap_builder.add_source_mapping_for_name(self.code.as_bytes(), span, name);
709        }
710    }
711}