Skip to main content

oxc_parser_napi/
raw_transfer.rs

1use std::{
2    mem::{self, ManuallyDrop},
3    ptr::{self, NonNull},
4    str,
5};
6
7use napi::{
8    Task,
9    bindgen_prelude::{AsyncTask, Uint8Array},
10};
11use napi_derive::napi;
12
13use oxc::{
14    allocator::{Allocator, FromIn, Vec as ArenaVec},
15    ast_visit::utf8_to_utf16::Utf8ToUtf16,
16    semantic::SemanticBuilder,
17};
18use oxc_napi::get_source_type;
19
20use crate::{
21    AstType, ParserOptions, get_ast_type, parse_impl,
22    raw_transfer_constants::{BLOCK_ALIGN as BUFFER_ALIGN, BUFFER_SIZE},
23    raw_transfer_types::{EcmaScriptModule, Error, RawTransferData, RawTransferMetadata},
24};
25
26// For raw transfer, use a buffer 2 GiB in size, with 4 GiB alignment.
27// This ensures that all 64-bit pointers have the same value in upper 32 bits,
28// so JS only needs to read the lower 32 bits to get an offset into the buffer.
29//
30// Buffer size only 2 GiB so 32-bit offsets don't have the highest bit set.
31// This is advantageous for 2 reasons:
32//
33// 1. V8 stores small integers ("SMI"s) inline, rather than on heap, which is more performant.
34//    But when V8 pointer compression is enabled, 31 bits is the max integer considered an SMI.
35//    So using 32 bits for offsets would be a large perf hit when pointer compression is enabled.
36// 2. JS bitwise operators work only on signed 32-bit integers, with 32nd bit as sign bit.
37//    So avoiding the 32nd bit being set enables using `>>` bitshift operator, which may be cheaper
38//    than `>>>`, without offsets being interpreted as negative.
39
40const BUMP_ALIGN: usize = 16;
41
42/// Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`.
43///
44/// Does not check that the offset is within bounds of `buffer`.
45/// To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes.
46#[napi(skip_typescript)]
47#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)]
48pub fn get_buffer_offset(buffer: Uint8Array) -> u32 {
49    let buffer = &*buffer;
50    let offset = (BUFFER_ALIGN - (buffer.as_ptr() as usize % BUFFER_ALIGN)) % BUFFER_ALIGN;
51    #[expect(clippy::cast_possible_truncation)]
52    return offset as u32;
53}
54
55/// Parse AST into provided `Uint8Array` buffer, synchronously.
56///
57/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
58/// provided as `source_len`.
59///
60/// This function will parse the source, and write the AST into the buffer, starting at the end.
61///
62/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
63///
64/// Caller can deserialize data from the buffer on JS side.
65///
66/// # SAFETY
67///
68/// Caller must ensure:
69/// * Source text is written into start of the buffer.
70/// * Source text's UTF-8 byte length is `source_len`.
71/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
72///
73/// If source text is originally a JS string on JS side, and converted to a buffer with
74/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
75///
76/// # Panics
77///
78/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
79#[napi(skip_typescript)]
80#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)]
81pub unsafe fn parse_raw_sync(
82    filename: String,
83    mut buffer: Uint8Array,
84    source_len: u32,
85    options: Option<ParserOptions>,
86) {
87    // SAFETY: This function is called synchronously, so buffer cannot be mutated outside this function
88    // during the time this `&mut [u8]` exists
89    let buffer = unsafe { buffer.as_mut() };
90
91    // SAFETY: `parse_raw_impl` has same safety requirements as this function
92    unsafe { parse_raw_impl(&filename, buffer, source_len, options) };
93}
94
95/// Parse AST into provided `Uint8Array` buffer, asynchronously.
96///
97/// Note: This function can be slower than `parseRawSync` due to the overhead of spawning a thread.
98///
99/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
100/// provided as `source_len`.
101///
102/// This function will parse the source, and write the AST into the buffer, starting at the end.
103///
104/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
105///
106/// Caller can deserialize data from the buffer on JS side.
107///
108/// # SAFETY
109///
110/// Caller must ensure:
111/// * Source text is written into start of the buffer.
112/// * Source text's UTF-8 byte length is `source_len`.
113/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
114/// * Contents of buffer must not be mutated by caller until the `AsyncTask` returned by this
115///   function resolves.
116///
117/// If source text is originally a JS string on JS side, and converted to a buffer with
118/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
119///
120/// # Panics
121///
122/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
123#[napi(skip_typescript)]
124pub fn parse_raw(
125    filename: String,
126    buffer: Uint8Array,
127    source_len: u32,
128    options: Option<ParserOptions>,
129) -> AsyncTask<ResolveTask> {
130    AsyncTask::new(ResolveTask { filename, buffer, source_len, options })
131}
132
133pub struct ResolveTask {
134    filename: String,
135    buffer: Uint8Array,
136    source_len: u32,
137    options: Option<ParserOptions>,
138}
139
140#[napi]
141impl Task for ResolveTask {
142    type JsValue = ();
143    type Output = ();
144
145    fn compute(&mut self) -> napi::Result<()> {
146        // SAFETY: Caller of `parse_async` guarantees not to mutate the contents of buffer
147        // between calling `parse_async` and the `AsyncTask` it returns resolving.
148        // Therefore, this is a valid exclusive `&mut [u8]`.
149        let buffer = unsafe { self.buffer.as_mut() };
150        // SAFETY: Caller of `parse_async` guarantees to uphold invariants of `parse_raw_impl`
151        unsafe { parse_raw_impl(&self.filename, buffer, self.source_len, self.options.take()) };
152        Ok(())
153    }
154
155    fn resolve(&mut self, _: napi::Env, _result: ()) -> napi::Result<()> {
156        Ok(())
157    }
158}
159
160/// Parse AST into buffer.
161///
162/// # SAFETY
163///
164/// Caller must ensure:
165/// * Source text is written into start of the buffer.
166/// * Source text's UTF-8 byte length is `source_len`.
167/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
168///
169/// If source text is originally a JS string on JS side, and converted to a buffer with
170/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
171#[allow(clippy::items_after_statements, clippy::allow_attributes)]
172unsafe fn parse_raw_impl(
173    filename: &str,
174    buffer: &mut [u8],
175    source_len: u32,
176    options: Option<ParserOptions>,
177) {
178    // Check buffer has expected size and alignment
179    assert_eq!(buffer.len(), BUFFER_SIZE);
180    let buffer_ptr = ptr::from_mut(buffer).cast::<u8>();
181    assert!((buffer_ptr as usize).is_multiple_of(BUFFER_ALIGN));
182
183    // Get offsets and size of data region to be managed by arena allocator.
184    // Leave space for source before it, and space for metadata after it.
185    // Metadata actually only takes 5 bytes, but round everything up to multiple of 16,
186    // as the arena allocator requires that alignment.
187    const RAW_METADATA_SIZE: usize = size_of::<RawTransferMetadata>();
188    const {
189        assert!(RAW_METADATA_SIZE >= BUMP_ALIGN);
190        assert!(RAW_METADATA_SIZE.is_multiple_of(BUMP_ALIGN));
191    };
192    let source_len = source_len as usize;
193    let data_offset = source_len.next_multiple_of(BUMP_ALIGN);
194    let data_size = (BUFFER_SIZE - RAW_METADATA_SIZE).saturating_sub(data_offset);
195    assert!(data_size >= Allocator::RAW_MIN_SIZE, "Source text is too long");
196
197    // Create `Allocator`.
198    // Wrap in `ManuallyDrop` so the allocation doesn't get freed at end of function, or if panic.
199    // SAFETY: `data_offset` is less than `buffer.len()`, so `.add(data_offset)` cannot wrap
200    // or be out of bounds.
201    let data_ptr = unsafe { buffer_ptr.add(data_offset) };
202    debug_assert!((data_ptr as usize).is_multiple_of(BUMP_ALIGN));
203    debug_assert!(data_size.is_multiple_of(BUMP_ALIGN));
204    // SAFETY: `data_ptr` and `data_size` outline a section of the memory in `buffer`.
205    // `data_ptr` and `data_size` are multiples of 16.
206    // `data_size` is greater than `Allocator::MIN_SIZE`.
207    let allocator =
208        unsafe { Allocator::from_raw_parts(NonNull::new_unchecked(data_ptr), data_size) };
209    let allocator = ManuallyDrop::new(allocator);
210
211    // Parse source.
212    // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator`
213    // exist after this.
214    let options = options.unwrap_or_default();
215    let source_type =
216        get_source_type(filename, options.lang.as_deref(), options.source_type.as_deref());
217    let ast_type = get_ast_type(source_type, &options);
218
219    let data_ptr = {
220        // SAFETY: We checked above that `source_len` does not exceed length of buffer
221        let source_text = unsafe { buffer.get_unchecked(..source_len) };
222        // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
223        let source_text = unsafe { str::from_utf8_unchecked(source_text) };
224
225        let ret = parse_impl(&allocator, source_type, source_text, &options);
226        let mut program = ret.program;
227        let mut comments = mem::replace(&mut program.comments, ArenaVec::new_in(&allocator));
228        let mut module_record = ret.module_record;
229
230        // Convert errors.
231        // Run `SemanticBuilder` if requested.
232        //
233        // Note: Avoid calling `Error::from_diagnostics_in` unless there are some errors,
234        // because it's fairly expensive (it copies whole of source text into a `String`).
235        let mut errors = if options.show_semantic_errors == Some(true) {
236            let semantic_ret = SemanticBuilder::new().with_check_syntax_error(true).build(&program);
237
238            if !ret.errors.is_empty() || !semantic_ret.errors.is_empty() {
239                Error::from_diagnostics_in(
240                    ret.errors.into_iter().chain(semantic_ret.errors),
241                    source_text,
242                    filename,
243                    &allocator,
244                )
245            } else {
246                ArenaVec::new_in(&allocator)
247            }
248        } else if !ret.errors.is_empty() {
249            Error::from_diagnostics_in(ret.errors, source_text, filename, &allocator)
250        } else {
251            ArenaVec::new_in(&allocator)
252        };
253
254        // Convert spans to UTF-16
255        let span_converter = Utf8ToUtf16::new(source_text);
256        span_converter.convert_program(&mut program);
257        span_converter.convert_comments(&mut comments);
258        span_converter.convert_module_record(&mut module_record);
259        if let Some(mut converter) = span_converter.converter() {
260            for error in &mut errors {
261                for label in &mut error.labels {
262                    converter.convert_span(&mut label.span);
263                }
264            }
265        }
266
267        // Convert module record
268        let module = EcmaScriptModule::from_in(module_record, &allocator);
269
270        // Write `RawTransferData` to arena, and return pointer to it
271        let data = RawTransferData { program, comments, module, errors };
272        let data = allocator.alloc(data);
273        ptr::from_ref(data).cast::<u8>()
274    };
275
276    // Write metadata into end of buffer
277    #[allow(clippy::cast_possible_truncation)]
278    let metadata = RawTransferMetadata::new(data_ptr as u32, ast_type == AstType::TypeScript);
279    const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
280    const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
281    // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
282    // `RAW_METADATA_OFFSET` is aligned on 16.
283    #[expect(clippy::cast_ptr_alignment)]
284    unsafe {
285        buffer_ptr.add(RAW_METADATA_OFFSET).cast::<RawTransferMetadata>().write(metadata);
286    }
287}