oxc_parser_napi/
raw_transfer.rs

1use std::{
2    mem::{self, ManuallyDrop},
3    ptr::{self, NonNull},
4    str,
5};
6
7use napi::{
8    Task,
9    bindgen_prelude::{AsyncTask, Uint8Array},
10};
11use napi_derive::napi;
12
13use oxc::{
14    allocator::{Allocator, FromIn, Vec as ArenaVec},
15    ast_visit::utf8_to_utf16::Utf8ToUtf16,
16    semantic::SemanticBuilder,
17};
18use oxc_napi::get_source_type;
19
20use crate::{
21    AstType, ParserOptions, get_ast_type, parse,
22    raw_transfer_constants::{BLOCK_ALIGN as BUFFER_ALIGN, BUFFER_SIZE},
23    raw_transfer_types::{EcmaScriptModule, Error, RawTransferData, RawTransferMetadata},
24};
25
26// For raw transfer, use a buffer 2 GiB in size, with 4 GiB alignment.
27// This ensures that all 64-bit pointers have the same value in upper 32 bits,
28// so JS only needs to read the lower 32 bits to get an offset into the buffer.
29//
30// Buffer size only 2 GiB so 32-bit offsets don't have the highest bit set.
31// This is advantageous for 2 reasons:
32//
33// 1. V8 stores small integers ("SMI"s) inline, rather than on heap, which is more performant.
34//    But when V8 pointer compression is enabled, 31 bits is the max integer considered an SMI.
35//    So using 32 bits for offsets would be a large perf hit when pointer compression is enabled.
36// 2. JS bitwise operators work only on signed 32-bit integers, with 32nd bit as sign bit.
37//    So avoiding the 32nd bit being set enables using `>>` bitshift operator, which may be cheaper
38//    than `>>>`, without offsets being interpreted as negative.
39
40const BUMP_ALIGN: usize = 16;
41
42/// Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`.
43///
44/// Does not check that the offset is within bounds of `buffer`.
45/// To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes.
46#[napi(skip_typescript)]
47pub fn get_buffer_offset(buffer: Uint8Array) -> u32 {
48    let buffer = &*buffer;
49    let offset = (BUFFER_ALIGN - (buffer.as_ptr() as usize % BUFFER_ALIGN)) % BUFFER_ALIGN;
50    #[expect(clippy::cast_possible_truncation)]
51    return offset as u32;
52}
53
54/// Parse AST into provided `Uint8Array` buffer, synchronously.
55///
56/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
57/// provided as `source_len`.
58///
59/// This function will parse the source, and write the AST into the buffer, starting at the end.
60///
61/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
62///
63/// Caller can deserialize data from the buffer on JS side.
64///
65/// # SAFETY
66///
67/// Caller must ensure:
68/// * Source text is written into start of the buffer.
69/// * Source text's UTF-8 byte length is `source_len`.
70/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
71///
72/// If source text is originally a JS string on JS side, and converted to a buffer with
73/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
74///
75/// # Panics
76///
77/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
78#[napi(skip_typescript)]
79pub unsafe fn parse_sync_raw(
80    filename: String,
81    mut buffer: Uint8Array,
82    source_len: u32,
83    options: Option<ParserOptions>,
84) {
85    // SAFETY: This function is called synchronously, so buffer cannot be mutated outside this function
86    // during the time this `&mut [u8]` exists
87    let buffer = unsafe { buffer.as_mut() };
88
89    // SAFETY: `parse_raw_impl` has same safety requirements as this function
90    unsafe { parse_raw_impl(&filename, buffer, source_len, options) };
91}
92
93/// Parse AST into provided `Uint8Array` buffer, asynchronously.
94///
95/// Note: This function can be slower than `parseSyncRaw` due to the overhead of spawning a thread.
96///
97/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
98/// provided as `source_len`.
99///
100/// This function will parse the source, and write the AST into the buffer, starting at the end.
101///
102/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
103///
104/// Caller can deserialize data from the buffer on JS side.
105///
106/// # SAFETY
107///
108/// Caller must ensure:
109/// * Source text is written into start of the buffer.
110/// * Source text's UTF-8 byte length is `source_len`.
111/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
112/// * Contents of buffer must not be mutated by caller until the `AsyncTask` returned by this
113///   function resolves.
114///
115/// If source text is originally a JS string on JS side, and converted to a buffer with
116/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
117///
118/// # Panics
119///
120/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
121#[napi(skip_typescript)]
122pub fn parse_async_raw(
123    filename: String,
124    buffer: Uint8Array,
125    source_len: u32,
126    options: Option<ParserOptions>,
127) -> AsyncTask<ResolveTask> {
128    AsyncTask::new(ResolveTask { filename, buffer, source_len, options })
129}
130
131pub struct ResolveTask {
132    filename: String,
133    buffer: Uint8Array,
134    source_len: u32,
135    options: Option<ParserOptions>,
136}
137
138#[napi]
139impl Task for ResolveTask {
140    type JsValue = ();
141    type Output = ();
142
143    fn compute(&mut self) -> napi::Result<()> {
144        // SAFETY: Caller of `parse_async` guarantees not to mutate the contents of buffer
145        // between calling `parse_async` and the `AsyncTask` it returns resolving.
146        // Therefore, this is a valid exclusive `&mut [u8]`.
147        let buffer = unsafe { self.buffer.as_mut() };
148        // SAFETY: Caller of `parse_async` guarantees to uphold invariants of `parse_raw_impl`
149        unsafe { parse_raw_impl(&self.filename, buffer, self.source_len, self.options.take()) };
150        Ok(())
151    }
152
153    fn resolve(&mut self, _: napi::Env, _result: ()) -> napi::Result<()> {
154        Ok(())
155    }
156}
157
158/// Parse AST into buffer.
159///
160/// # SAFETY
161///
162/// Caller must ensure:
163/// * Source text is written into start of the buffer.
164/// * Source text's UTF-8 byte length is `source_len`.
165/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
166///
167/// If source text is originally a JS string on JS side, and converted to a buffer with
168/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
169#[allow(clippy::items_after_statements, clippy::allow_attributes)]
170unsafe fn parse_raw_impl(
171    filename: &str,
172    buffer: &mut [u8],
173    source_len: u32,
174    options: Option<ParserOptions>,
175) {
176    // Check buffer has expected size and alignment
177    assert_eq!(buffer.len(), BUFFER_SIZE);
178    let buffer_ptr = ptr::from_mut(buffer).cast::<u8>();
179    assert!((buffer_ptr as usize).is_multiple_of(BUFFER_ALIGN));
180
181    // Get offsets and size of data region to be managed by arena allocator.
182    // Leave space for source before it, and space for metadata after it.
183    // Metadata actually only takes 5 bytes, but round everything up to multiple of 16,
184    // as `bumpalo` requires that alignment.
185    const RAW_METADATA_SIZE: usize = size_of::<RawTransferMetadata>();
186    const {
187        assert!(RAW_METADATA_SIZE >= BUMP_ALIGN);
188        assert!(RAW_METADATA_SIZE.is_multiple_of(BUMP_ALIGN));
189    };
190    let source_len = source_len as usize;
191    let data_offset = source_len.next_multiple_of(BUMP_ALIGN);
192    let data_size = (BUFFER_SIZE - RAW_METADATA_SIZE).saturating_sub(data_offset);
193    assert!(data_size >= Allocator::RAW_MIN_SIZE, "Source text is too long");
194
195    // Create `Allocator`.
196    // Wrap in `ManuallyDrop` so the allocation doesn't get freed at end of function, or if panic.
197    // SAFETY: `data_offset` is less than `buffer.len()`, so `.add(data_offset)` cannot wrap
198    // or be out of bounds.
199    let data_ptr = unsafe { buffer_ptr.add(data_offset) };
200    debug_assert!((data_ptr as usize).is_multiple_of(BUMP_ALIGN));
201    debug_assert!(data_size.is_multiple_of(BUMP_ALIGN));
202    // SAFETY: `data_ptr` and `data_size` outline a section of the memory in `buffer`.
203    // `data_ptr` and `data_size` are multiples of 16.
204    // `data_size` is greater than `Allocator::MIN_SIZE`.
205    let allocator =
206        unsafe { Allocator::from_raw_parts(NonNull::new_unchecked(data_ptr), data_size) };
207    let allocator = ManuallyDrop::new(allocator);
208
209    // Parse source.
210    // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator`
211    // exist after this.
212    let options = options.unwrap_or_default();
213    let source_type =
214        get_source_type(filename, options.lang.as_deref(), options.source_type.as_deref());
215    let ast_type = get_ast_type(source_type, &options);
216
217    let data_ptr = {
218        // SAFETY: We checked above that `source_len` does not exceed length of buffer
219        let source_text = unsafe { buffer.get_unchecked(..source_len) };
220        // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
221        let source_text = unsafe { str::from_utf8_unchecked(source_text) };
222
223        let ret = parse(&allocator, source_type, source_text, &options);
224        let mut program = ret.program;
225        let mut comments = mem::replace(&mut program.comments, ArenaVec::new_in(&allocator));
226        let mut module_record = ret.module_record;
227
228        // Convert errors.
229        // Run `SemanticBuilder` if requested.
230        //
231        // Note: Avoid calling `Error::from_diagnostics_in` unless there are some errors,
232        // because it's fairly expensive (it copies whole of source text into a `String`).
233        let mut errors = if options.show_semantic_errors == Some(true) {
234            let semantic_ret = SemanticBuilder::new().with_check_syntax_error(true).build(&program);
235
236            if !ret.errors.is_empty() || !semantic_ret.errors.is_empty() {
237                Error::from_diagnostics_in(
238                    ret.errors.into_iter().chain(semantic_ret.errors),
239                    source_text,
240                    filename,
241                    &allocator,
242                )
243            } else {
244                ArenaVec::new_in(&allocator)
245            }
246        } else if !ret.errors.is_empty() {
247            Error::from_diagnostics_in(ret.errors, source_text, filename, &allocator)
248        } else {
249            ArenaVec::new_in(&allocator)
250        };
251
252        // Convert spans to UTF-16
253        let span_converter = Utf8ToUtf16::new(source_text);
254        span_converter.convert_program(&mut program);
255        span_converter.convert_comments(&mut comments);
256        span_converter.convert_module_record(&mut module_record);
257        if let Some(mut converter) = span_converter.converter() {
258            for error in &mut errors {
259                for label in &mut error.labels {
260                    converter.convert_span(&mut label.span);
261                }
262            }
263        }
264
265        // Convert module record
266        let module = EcmaScriptModule::from_in(module_record, &allocator);
267
268        // Write `RawTransferData` to arena, and return pointer to it
269        let data = RawTransferData { program, comments, module, errors };
270        let data = allocator.alloc(data);
271        ptr::from_ref(data).cast::<u8>()
272    };
273
274    // Write metadata into end of buffer
275    #[allow(clippy::cast_possible_truncation)]
276    let metadata = RawTransferMetadata::new(data_ptr as u32, ast_type == AstType::TypeScript);
277    const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
278    const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
279    // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
280    // `RAW_METADATA_OFFSET` is aligned on 16.
281    #[expect(clippy::cast_ptr_alignment)]
282    unsafe {
283        buffer_ptr.add(RAW_METADATA_OFFSET).cast::<RawTransferMetadata>().write(metadata);
284    }
285}
286
287/// Returns `true` if raw transfer is supported on this platform.
288//
289// This module is only compiled on 64-bit little-endian platforms.
290// Fallback version for unsupported platforms in `lib.rs`.
291#[napi]
292pub fn raw_transfer_supported() -> bool {
293    true
294}