oxc_parser_napi/raw_transfer.rs
1use std::{
2 mem::{self, ManuallyDrop},
3 ptr::{self, NonNull},
4 str,
5};
6
7use napi::{
8 Task,
9 bindgen_prelude::{AsyncTask, Uint8Array},
10};
11use napi_derive::napi;
12
13use oxc::{
14 allocator::{Allocator, FromIn, Vec as ArenaVec},
15 ast_visit::utf8_to_utf16::Utf8ToUtf16,
16 semantic::SemanticBuilder,
17};
18use oxc_napi::get_source_type;
19
20use crate::{
21 AstType, ParserOptions, get_ast_type, parse,
22 raw_transfer_constants::{BLOCK_ALIGN as BUFFER_ALIGN, BUFFER_SIZE},
23 raw_transfer_types::{EcmaScriptModule, Error, RawTransferData, RawTransferMetadata},
24};
25
26// For raw transfer, use a buffer 2 GiB in size, with 4 GiB alignment.
27// This ensures that all 64-bit pointers have the same value in upper 32 bits,
28// so JS only needs to read the lower 32 bits to get an offset into the buffer.
29//
30// Buffer size only 2 GiB so 32-bit offsets don't have the highest bit set.
31// This is advantageous for 2 reasons:
32//
33// 1. V8 stores small integers ("SMI"s) inline, rather than on heap, which is more performant.
34// But when V8 pointer compression is enabled, 31 bits is the max integer considered an SMI.
35// So using 32 bits for offsets would be a large perf hit when pointer compression is enabled.
36// 2. JS bitwise operators work only on signed 32-bit integers, with 32nd bit as sign bit.
37// So avoiding the 32nd bit being set enables using `>>` bitshift operator, which may be cheaper
38// than `>>>`, without offsets being interpreted as negative.
39
40const BUMP_ALIGN: usize = 16;
41
42/// Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`.
43///
44/// Does not check that the offset is within bounds of `buffer`.
45/// To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes.
46#[napi(skip_typescript)]
47pub fn get_buffer_offset(buffer: Uint8Array) -> u32 {
48 let buffer = &*buffer;
49 let offset = (BUFFER_ALIGN - (buffer.as_ptr() as usize % BUFFER_ALIGN)) % BUFFER_ALIGN;
50 #[expect(clippy::cast_possible_truncation)]
51 return offset as u32;
52}
53
54/// Parse AST into provided `Uint8Array` buffer, synchronously.
55///
56/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
57/// provided as `source_len`.
58///
59/// This function will parse the source, and write the AST into the buffer, starting at the end.
60///
61/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
62///
63/// Caller can deserialize data from the buffer on JS side.
64///
65/// # SAFETY
66///
67/// Caller must ensure:
68/// * Source text is written into start of the buffer.
69/// * Source text's UTF-8 byte length is `source_len`.
70/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
71///
72/// If source text is originally a JS string on JS side, and converted to a buffer with
73/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
74///
75/// # Panics
76///
77/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
78#[napi(skip_typescript)]
79pub unsafe fn parse_sync_raw(
80 filename: String,
81 mut buffer: Uint8Array,
82 source_len: u32,
83 options: Option<ParserOptions>,
84) {
85 // SAFETY: This function is called synchronously, so buffer cannot be mutated outside this function
86 // during the time this `&mut [u8]` exists
87 let buffer = unsafe { buffer.as_mut() };
88
89 // SAFETY: `parse_raw_impl` has same safety requirements as this function
90 unsafe { parse_raw_impl(&filename, buffer, source_len, options) };
91}
92
93/// Parse AST into provided `Uint8Array` buffer, asynchronously.
94///
95/// Note: This function can be slower than `parseSyncRaw` due to the overhead of spawning a thread.
96///
97/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
98/// provided as `source_len`.
99///
100/// This function will parse the source, and write the AST into the buffer, starting at the end.
101///
102/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
103///
104/// Caller can deserialize data from the buffer on JS side.
105///
106/// # SAFETY
107///
108/// Caller must ensure:
109/// * Source text is written into start of the buffer.
110/// * Source text's UTF-8 byte length is `source_len`.
111/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
112/// * Contents of buffer must not be mutated by caller until the `AsyncTask` returned by this
113/// function resolves.
114///
115/// If source text is originally a JS string on JS side, and converted to a buffer with
116/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
117///
118/// # Panics
119///
120/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
121#[napi(skip_typescript)]
122pub fn parse_async_raw(
123 filename: String,
124 buffer: Uint8Array,
125 source_len: u32,
126 options: Option<ParserOptions>,
127) -> AsyncTask<ResolveTask> {
128 AsyncTask::new(ResolveTask { filename, buffer, source_len, options })
129}
130
131pub struct ResolveTask {
132 filename: String,
133 buffer: Uint8Array,
134 source_len: u32,
135 options: Option<ParserOptions>,
136}
137
138#[napi]
139impl Task for ResolveTask {
140 type JsValue = ();
141 type Output = ();
142
143 fn compute(&mut self) -> napi::Result<()> {
144 // SAFETY: Caller of `parse_async` guarantees not to mutate the contents of buffer
145 // between calling `parse_async` and the `AsyncTask` it returns resolving.
146 // Therefore, this is a valid exclusive `&mut [u8]`.
147 let buffer = unsafe { self.buffer.as_mut() };
148 // SAFETY: Caller of `parse_async` guarantees to uphold invariants of `parse_raw_impl`
149 unsafe { parse_raw_impl(&self.filename, buffer, self.source_len, self.options.take()) };
150 Ok(())
151 }
152
153 fn resolve(&mut self, _: napi::Env, _result: ()) -> napi::Result<()> {
154 Ok(())
155 }
156}
157
158/// Parse AST into buffer.
159///
160/// # SAFETY
161///
162/// Caller must ensure:
163/// * Source text is written into start of the buffer.
164/// * Source text's UTF-8 byte length is `source_len`.
165/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
166///
167/// If source text is originally a JS string on JS side, and converted to a buffer with
168/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
169#[allow(clippy::items_after_statements, clippy::allow_attributes)]
170unsafe fn parse_raw_impl(
171 filename: &str,
172 buffer: &mut [u8],
173 source_len: u32,
174 options: Option<ParserOptions>,
175) {
176 // Check buffer has expected size and alignment
177 assert_eq!(buffer.len(), BUFFER_SIZE);
178 let buffer_ptr = ptr::from_mut(buffer).cast::<u8>();
179 assert!((buffer_ptr as usize).is_multiple_of(BUFFER_ALIGN));
180
181 // Get offsets and size of data region to be managed by arena allocator.
182 // Leave space for source before it, and space for metadata after it.
183 // Metadata actually only takes 5 bytes, but round everything up to multiple of 16,
184 // as `bumpalo` requires that alignment.
185 const RAW_METADATA_SIZE: usize = size_of::<RawTransferMetadata>();
186 const {
187 assert!(RAW_METADATA_SIZE >= BUMP_ALIGN);
188 assert!(RAW_METADATA_SIZE.is_multiple_of(BUMP_ALIGN));
189 };
190 let source_len = source_len as usize;
191 let data_offset = source_len.next_multiple_of(BUMP_ALIGN);
192 let data_size = (BUFFER_SIZE - RAW_METADATA_SIZE).saturating_sub(data_offset);
193 assert!(data_size >= Allocator::RAW_MIN_SIZE, "Source text is too long");
194
195 // Create `Allocator`.
196 // Wrap in `ManuallyDrop` so the allocation doesn't get freed at end of function, or if panic.
197 // SAFETY: `data_offset` is less than `buffer.len()`, so `.add(data_offset)` cannot wrap
198 // or be out of bounds.
199 let data_ptr = unsafe { buffer_ptr.add(data_offset) };
200 debug_assert!((data_ptr as usize).is_multiple_of(BUMP_ALIGN));
201 debug_assert!(data_size.is_multiple_of(BUMP_ALIGN));
202 // SAFETY: `data_ptr` and `data_size` outline a section of the memory in `buffer`.
203 // `data_ptr` and `data_size` are multiples of 16.
204 // `data_size` is greater than `Allocator::MIN_SIZE`.
205 let allocator =
206 unsafe { Allocator::from_raw_parts(NonNull::new_unchecked(data_ptr), data_size) };
207 let allocator = ManuallyDrop::new(allocator);
208
209 // Parse source.
210 // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator`
211 // exist after this.
212 let options = options.unwrap_or_default();
213 let source_type =
214 get_source_type(filename, options.lang.as_deref(), options.source_type.as_deref());
215 let ast_type = get_ast_type(source_type, &options);
216
217 let data_ptr = {
218 // SAFETY: We checked above that `source_len` does not exceed length of buffer
219 let source_text = unsafe { buffer.get_unchecked(..source_len) };
220 // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
221 let source_text = unsafe { str::from_utf8_unchecked(source_text) };
222
223 let ret = parse(&allocator, source_type, source_text, &options);
224 let mut program = ret.program;
225 let mut comments = mem::replace(&mut program.comments, ArenaVec::new_in(&allocator));
226 let mut module_record = ret.module_record;
227
228 // Convert errors.
229 // Run `SemanticBuilder` if requested.
230 //
231 // Note: Avoid calling `Error::from_diagnostics_in` unless there are some errors,
232 // because it's fairly expensive (it copies whole of source text into a `String`).
233 let mut errors = if options.show_semantic_errors == Some(true) {
234 let semantic_ret = SemanticBuilder::new().with_check_syntax_error(true).build(&program);
235
236 if !ret.errors.is_empty() || !semantic_ret.errors.is_empty() {
237 Error::from_diagnostics_in(
238 ret.errors.into_iter().chain(semantic_ret.errors),
239 source_text,
240 filename,
241 &allocator,
242 )
243 } else {
244 ArenaVec::new_in(&allocator)
245 }
246 } else if !ret.errors.is_empty() {
247 Error::from_diagnostics_in(ret.errors, source_text, filename, &allocator)
248 } else {
249 ArenaVec::new_in(&allocator)
250 };
251
252 // Convert spans to UTF-16
253 let span_converter = Utf8ToUtf16::new(source_text);
254 span_converter.convert_program(&mut program);
255 span_converter.convert_comments(&mut comments);
256 span_converter.convert_module_record(&mut module_record);
257 if let Some(mut converter) = span_converter.converter() {
258 for error in &mut errors {
259 for label in &mut error.labels {
260 converter.convert_span(&mut label.span);
261 }
262 }
263 }
264
265 // Convert module record
266 let module = EcmaScriptModule::from_in(module_record, &allocator);
267
268 // Write `RawTransferData` to arena, and return pointer to it
269 let data = RawTransferData { program, comments, module, errors };
270 let data = allocator.alloc(data);
271 ptr::from_ref(data).cast::<u8>()
272 };
273
274 // Write metadata into end of buffer
275 #[allow(clippy::cast_possible_truncation)]
276 let metadata = RawTransferMetadata::new(data_ptr as u32, ast_type == AstType::TypeScript);
277 const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
278 const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
279 // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
280 // `RAW_METADATA_OFFSET` is aligned on 16.
281 #[expect(clippy::cast_ptr_alignment)]
282 unsafe {
283 buffer_ptr.add(RAW_METADATA_OFFSET).cast::<RawTransferMetadata>().write(metadata);
284 }
285}
286
287/// Returns `true` if raw transfer is supported on this platform.
288//
289// This module is only compiled on 64-bit little-endian platforms.
290// Fallback version for unsupported platforms in `lib.rs`.
291#[napi]
292pub fn raw_transfer_supported() -> bool {
293 true
294}