oxc_parser_napi/raw_transfer.rs
1use std::{
2 mem::{self, ManuallyDrop},
3 ptr::{self, NonNull},
4 str,
5};
6
7use napi::{
8 Task,
9 bindgen_prelude::{AsyncTask, Uint8Array},
10};
11use napi_derive::napi;
12
13use oxc::{
14 allocator::{Allocator, FromIn, Vec as ArenaVec},
15 ast_visit::utf8_to_utf16::Utf8ToUtf16,
16 semantic::SemanticBuilder,
17};
18use oxc_napi::get_source_type;
19
20use crate::{
21 AstType, ParserOptions, get_ast_type, parse_impl,
22 raw_transfer_constants::{BLOCK_ALIGN as BUFFER_ALIGN, BUFFER_SIZE},
23 raw_transfer_types::{EcmaScriptModule, Error, RawTransferData, RawTransferMetadata},
24};
25
26// For raw transfer, use a buffer 2 GiB in size, with 4 GiB alignment.
27// This ensures that all 64-bit pointers have the same value in upper 32 bits,
28// so JS only needs to read the lower 32 bits to get an offset into the buffer.
29//
30// Buffer size only 2 GiB so 32-bit offsets don't have the highest bit set.
31// This is advantageous for 2 reasons:
32//
33// 1. V8 stores small integers ("SMI"s) inline, rather than on heap, which is more performant.
34// But when V8 pointer compression is enabled, 31 bits is the max integer considered an SMI.
35// So using 32 bits for offsets would be a large perf hit when pointer compression is enabled.
36// 2. JS bitwise operators work only on signed 32-bit integers, with 32nd bit as sign bit.
37// So avoiding the 32nd bit being set enables using `>>` bitshift operator, which may be cheaper
38// than `>>>`, without offsets being interpreted as negative.
39
40const BUMP_ALIGN: usize = 16;
41
42/// Get offset within a `Uint8Array` which is aligned on `BUFFER_ALIGN`.
43///
44/// Does not check that the offset is within bounds of `buffer`.
45/// To ensure it always is, provide a `Uint8Array` of at least `BUFFER_SIZE + BUFFER_ALIGN` bytes.
46#[napi(skip_typescript)]
47#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)]
48pub fn get_buffer_offset(buffer: Uint8Array) -> u32 {
49 let buffer = &*buffer;
50 let offset = (BUFFER_ALIGN - (buffer.as_ptr() as usize % BUFFER_ALIGN)) % BUFFER_ALIGN;
51 #[expect(clippy::cast_possible_truncation)]
52 return offset as u32;
53}
54
55/// Parse AST into provided `Uint8Array` buffer, synchronously.
56///
57/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
58/// provided as `source_len`.
59///
60/// This function will parse the source, and write the AST into the buffer, starting at the end.
61///
62/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
63///
64/// Caller can deserialize data from the buffer on JS side.
65///
66/// # SAFETY
67///
68/// Caller must ensure:
69/// * Source text is written into start of the buffer.
70/// * Source text's UTF-8 byte length is `source_len`.
71/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
72///
73/// If source text is originally a JS string on JS side, and converted to a buffer with
74/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
75///
76/// # Panics
77///
78/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
79#[napi(skip_typescript)]
80#[allow(clippy::needless_pass_by_value, clippy::allow_attributes)]
81pub unsafe fn parse_raw_sync(
82 filename: String,
83 mut buffer: Uint8Array,
84 source_len: u32,
85 options: Option<ParserOptions>,
86) {
87 // SAFETY: This function is called synchronously, so buffer cannot be mutated outside this function
88 // during the time this `&mut [u8]` exists
89 let buffer = unsafe { buffer.as_mut() };
90
91 // SAFETY: `parse_raw_impl` has same safety requirements as this function
92 unsafe { parse_raw_impl(&filename, buffer, source_len, options) };
93}
94
95/// Parse AST into provided `Uint8Array` buffer, asynchronously.
96///
97/// Note: This function can be slower than `parseRawSync` due to the overhead of spawning a thread.
98///
99/// Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
100/// provided as `source_len`.
101///
102/// This function will parse the source, and write the AST into the buffer, starting at the end.
103///
104/// It also writes to the very end of the buffer the offset of `Program` within the buffer.
105///
106/// Caller can deserialize data from the buffer on JS side.
107///
108/// # SAFETY
109///
110/// Caller must ensure:
111/// * Source text is written into start of the buffer.
112/// * Source text's UTF-8 byte length is `source_len`.
113/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
114/// * Contents of buffer must not be mutated by caller until the `AsyncTask` returned by this
115/// function resolves.
116///
117/// If source text is originally a JS string on JS side, and converted to a buffer with
118/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
119///
120/// # Panics
121///
122/// Panics if source text is too long, or AST takes more memory than is available in the buffer.
123#[napi(skip_typescript)]
124pub fn parse_raw(
125 filename: String,
126 buffer: Uint8Array,
127 source_len: u32,
128 options: Option<ParserOptions>,
129) -> AsyncTask<ResolveTask> {
130 AsyncTask::new(ResolveTask { filename, buffer, source_len, options })
131}
132
133pub struct ResolveTask {
134 filename: String,
135 buffer: Uint8Array,
136 source_len: u32,
137 options: Option<ParserOptions>,
138}
139
140#[napi]
141impl Task for ResolveTask {
142 type JsValue = ();
143 type Output = ();
144
145 fn compute(&mut self) -> napi::Result<()> {
146 // SAFETY: Caller of `parse_async` guarantees not to mutate the contents of buffer
147 // between calling `parse_async` and the `AsyncTask` it returns resolving.
148 // Therefore, this is a valid exclusive `&mut [u8]`.
149 let buffer = unsafe { self.buffer.as_mut() };
150 // SAFETY: Caller of `parse_async` guarantees to uphold invariants of `parse_raw_impl`
151 unsafe { parse_raw_impl(&self.filename, buffer, self.source_len, self.options.take()) };
152 Ok(())
153 }
154
155 fn resolve(&mut self, _: napi::Env, _result: ()) -> napi::Result<()> {
156 Ok(())
157 }
158}
159
160/// Parse AST into buffer.
161///
162/// # SAFETY
163///
164/// Caller must ensure:
165/// * Source text is written into start of the buffer.
166/// * Source text's UTF-8 byte length is `source_len`.
167/// * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
168///
169/// If source text is originally a JS string on JS side, and converted to a buffer with
170/// `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
171#[allow(clippy::items_after_statements, clippy::allow_attributes)]
172unsafe fn parse_raw_impl(
173 filename: &str,
174 buffer: &mut [u8],
175 source_len: u32,
176 options: Option<ParserOptions>,
177) {
178 // Check buffer has expected size and alignment
179 assert_eq!(buffer.len(), BUFFER_SIZE);
180 let buffer_ptr = ptr::from_mut(buffer).cast::<u8>();
181 assert!((buffer_ptr as usize).is_multiple_of(BUFFER_ALIGN));
182
183 // Get offsets and size of data region to be managed by arena allocator.
184 // Leave space for source before it, and space for metadata after it.
185 // Metadata actually only takes 5 bytes, but round everything up to multiple of 16,
186 // as the arena allocator requires that alignment.
187 const RAW_METADATA_SIZE: usize = size_of::<RawTransferMetadata>();
188 const {
189 assert!(RAW_METADATA_SIZE >= BUMP_ALIGN);
190 assert!(RAW_METADATA_SIZE.is_multiple_of(BUMP_ALIGN));
191 };
192 let source_len = source_len as usize;
193 let data_offset = source_len.next_multiple_of(BUMP_ALIGN);
194 let data_size = (BUFFER_SIZE - RAW_METADATA_SIZE).saturating_sub(data_offset);
195 assert!(data_size >= Allocator::RAW_MIN_SIZE, "Source text is too long");
196
197 // Create `Allocator`.
198 // Wrap in `ManuallyDrop` so the allocation doesn't get freed at end of function, or if panic.
199 // SAFETY: `data_offset` is less than `buffer.len()`, so `.add(data_offset)` cannot wrap
200 // or be out of bounds.
201 let data_ptr = unsafe { buffer_ptr.add(data_offset) };
202 debug_assert!((data_ptr as usize).is_multiple_of(BUMP_ALIGN));
203 debug_assert!(data_size.is_multiple_of(BUMP_ALIGN));
204 // SAFETY: `data_ptr` and `data_size` outline a section of the memory in `buffer`.
205 // `data_ptr` and `data_size` are multiples of 16.
206 // `data_size` is greater than `Allocator::MIN_SIZE`.
207 let allocator =
208 unsafe { Allocator::from_raw_parts(NonNull::new_unchecked(data_ptr), data_size) };
209 let allocator = ManuallyDrop::new(allocator);
210
211 // Parse source.
212 // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator`
213 // exist after this.
214 let options = options.unwrap_or_default();
215 let source_type =
216 get_source_type(filename, options.lang.as_deref(), options.source_type.as_deref());
217 let ast_type = get_ast_type(source_type, &options);
218
219 let data_ptr = {
220 // SAFETY: We checked above that `source_len` does not exceed length of buffer
221 let source_text = unsafe { buffer.get_unchecked(..source_len) };
222 // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
223 let source_text = unsafe { str::from_utf8_unchecked(source_text) };
224
225 let ret = parse_impl(&allocator, source_type, source_text, &options);
226 let mut program = ret.program;
227 let mut comments = mem::replace(&mut program.comments, ArenaVec::new_in(&allocator));
228 let mut module_record = ret.module_record;
229
230 // Convert errors.
231 // Run `SemanticBuilder` if requested.
232 //
233 // Note: Avoid calling `Error::from_diagnostics_in` unless there are some errors,
234 // because it's fairly expensive (it copies whole of source text into a `String`).
235 let mut errors = if options.show_semantic_errors == Some(true) {
236 let semantic_ret = SemanticBuilder::new().with_check_syntax_error(true).build(&program);
237
238 if !ret.errors.is_empty() || !semantic_ret.errors.is_empty() {
239 Error::from_diagnostics_in(
240 ret.errors.into_iter().chain(semantic_ret.errors),
241 source_text,
242 filename,
243 &allocator,
244 )
245 } else {
246 ArenaVec::new_in(&allocator)
247 }
248 } else if !ret.errors.is_empty() {
249 Error::from_diagnostics_in(ret.errors, source_text, filename, &allocator)
250 } else {
251 ArenaVec::new_in(&allocator)
252 };
253
254 // Convert spans to UTF-16
255 let span_converter = Utf8ToUtf16::new(source_text);
256 span_converter.convert_program(&mut program);
257 span_converter.convert_comments(&mut comments);
258 span_converter.convert_module_record(&mut module_record);
259 if let Some(mut converter) = span_converter.converter() {
260 for error in &mut errors {
261 for label in &mut error.labels {
262 converter.convert_span(&mut label.span);
263 }
264 }
265 }
266
267 // Convert module record
268 let module = EcmaScriptModule::from_in(module_record, &allocator);
269
270 // Write `RawTransferData` to arena, and return pointer to it
271 let data = RawTransferData { program, comments, module, errors };
272 let data = allocator.alloc(data);
273 ptr::from_ref(data).cast::<u8>()
274 };
275
276 // Write metadata into end of buffer
277 #[allow(clippy::cast_possible_truncation)]
278 let metadata = RawTransferMetadata::new(data_ptr as u32, ast_type == AstType::TypeScript);
279 const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
280 const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
281 // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
282 // `RAW_METADATA_OFFSET` is aligned on 16.
283 #[expect(clippy::cast_ptr_alignment)]
284 unsafe {
285 buffer_ptr.add(RAW_METADATA_OFFSET).cast::<RawTransferMetadata>().write(metadata);
286 }
287}