Skip to main content

wasm_rquickjs/
inject.rs

1use anyhow::{Context, anyhow};
2use camino::Utf8Path;
3use wasm_encoder::reencode::{Error, Reencode, ReencodeComponent};
4
5/// Magic bytes identifying a wasm-rquickjs JS injection marker.
6pub const SLOT_MAGIC: &[u8; 16] = b"WASM_RQJS_SLOT\x01\x00";
7
8/// Magic bytes at the end of a marker, used to validate integrity.
9pub const SLOT_END_MAGIC: &[u8; 16] = b"WASM_RQJS_SLTND\x00";
10
11/// Total size of the marker: MAGIC(16) + MODULE_INDEX(4) + JS_OFFSET(4) + END_MAGIC(16) = 40 bytes.
12/// MODULE_INDEX identifies which JS module slot this is (0 = primary, 1+ = additional).
13/// JS_OFFSET is a pointer into linear memory. Value 0 = no JS injected.
14const MARKER_SIZE: usize = 40;
15
16const WASM_PAGE_SIZE: u32 = 65536;
17
18/// Creates a 40-byte marker file. Layout:
19///
20/// ```text
21/// [MAGIC 16 bytes][MODULE_INDEX u32 LE][JS_OFFSET u32 LE = 0][END_MAGIC 16 bytes]
22/// ```
23///
24/// MODULE_INDEX identifies which JS module this slot is for (0 = primary export module,
25/// 1+ = additional modules in order).
26/// JS_OFFSET=0 indicates no JS has been injected. After injection, JS_OFFSET
27/// points to a memory location containing `[JS_LEN u32 LE][JS bytes]`.
28pub fn create_marker_file(module_index: u32) -> Vec<u8> {
29    let mut data = Vec::with_capacity(MARKER_SIZE);
30    data.extend_from_slice(SLOT_MAGIC);
31    data.extend_from_slice(&module_index.to_le_bytes());
32    data.extend_from_slice(&0u32.to_le_bytes()); // js_offset = 0 (not injected)
33    data.extend_from_slice(SLOT_END_MAGIC);
34    data
35}
36
37/// Injects JavaScript source code into a compiled WASM component that was built with
38/// `EmbeddingMode::BinarySlot`.
39///
40/// This structurally rewrites the WASM component using `wasmparser` + `wasm-encoder`:
41/// 1. Finds data segments containing 40-byte markers and records their module indices.
42/// 2. Grows the core module's memory to fit all new JS data segments.
43/// 3. Adds new active data segments at the end of memory containing the JS for each module.
44/// 4. Updates the DataCount section to account for the extra segments.
45///
46/// There is no capacity limit — each JS source can be any size.
47///
48/// The `js_sources` slice maps by position to marker MODULE_INDEX: the first entry
49/// is injected into the marker with MODULE_INDEX=0, the second into MODULE_INDEX=1, etc.
50pub fn inject_js_into_component(
51    input: &Utf8Path,
52    output: &Utf8Path,
53    js_sources: &[&str],
54) -> anyhow::Result<()> {
55    let wasm_bytes = std::fs::read(input.as_std_path())
56        .with_context(|| format!("Failed to read input component: {input}"))?;
57
58    let patched = inject_js_into_bytes(&wasm_bytes, js_sources)?;
59
60    std::fs::write(output.as_std_path(), &patched)
61        .with_context(|| format!("Failed to write output component: {output}"))?;
62
63    Ok(())
64}
65
66/// Injects JavaScript sources into WASM component bytes, returning the patched bytes.
67///
68/// Each entry in `js_sources` corresponds to a marker MODULE_INDEX (0, 1, 2, ...).
69pub fn inject_js_into_bytes(wasm_bytes: &[u8], js_sources: &[&str]) -> anyhow::Result<Vec<u8>> {
70    if js_sources.is_empty() {
71        return Err(anyhow!("No JS sources provided for injection"));
72    }
73
74    // Build JS payloads: for each source, LEN(4) + JS bytes
75    let js_payloads: Vec<Vec<u8>> = js_sources
76        .iter()
77        .map(|src| {
78            let js_bytes = src.as_bytes();
79            let mut payload = Vec::with_capacity(4 + js_bytes.len());
80            payload.extend_from_slice(&(js_bytes.len() as u32).to_le_bytes());
81            payload.extend_from_slice(js_bytes);
82            payload
83        })
84        .collect();
85
86    let total_payload_size: usize = js_payloads.iter().map(|p| p.len()).sum();
87
88    let mut rewriter = MarkerRewriter {
89        js_payloads,
90        total_payload_size,
91        markers_found: Vec::new(),
92        max_data_end: 0,
93        js_mem_offsets: Vec::new(),
94        original_memory_min: 0,
95    };
96
97    let parser = wasmparser_encoder::Parser::new(0);
98    let mut component = wasm_encoder::Component::new();
99    rewriter
100        .parse_component(&mut component, parser, wasm_bytes)
101        .map_err(|e| match e {
102            Error::UserError(e) => e,
103            Error::ParseError(e) => anyhow!("Failed to parse WASM component: {e}"),
104            other => anyhow!("Failed to reencode WASM component: {other}"),
105        })?;
106
107    if rewriter.markers_found.is_empty() {
108        return Err(anyhow!(
109            "No JS injection markers found in the WASM component. \
110             Was it compiled with EmbeddingMode::BinarySlot?"
111        ));
112    }
113
114    // Verify all expected markers were found
115    for i in 0..js_sources.len() as u32 {
116        if !rewriter.markers_found.contains(&i) {
117            return Err(anyhow!(
118                "JS injection marker with MODULE_INDEX={i} not found in the WASM component. \
119                 Expected {expected} markers but only found: {found:?}",
120                expected = js_sources.len(),
121                found = rewriter.markers_found,
122            ));
123        }
124    }
125
126    let mut output = component.finish();
127
128    // Final binary patch: set JS_OFFSET in each marker to point to its JS data.
129    patch_js_offsets_in_output(&mut output, &rewriter.js_mem_offsets)?;
130
131    Ok(output)
132}
133
134/// Returns true if `data[offset..]` starts with a valid 40-byte marker pattern:
135/// SLOT_MAGIC(16) + MODULE_INDEX(4) + JS_OFFSET(4) + SLOT_END_MAGIC(16)
136fn is_marker_at(data: &[u8], offset: usize) -> bool {
137    offset + MARKER_SIZE <= data.len()
138        && &data[offset..offset + 16] == SLOT_MAGIC
139        && &data[offset + 24..offset + MARKER_SIZE] == SLOT_END_MAGIC
140}
141
142/// Reads the MODULE_INDEX field from a marker at the given offset.
143fn marker_module_index(data: &[u8], offset: usize) -> u32 {
144    u32::from_le_bytes(data[offset + 16..offset + 20].try_into().unwrap())
145}
146
147/// Reads the JS_OFFSET field from a marker at the given offset.
148fn marker_js_offset(data: &[u8], offset: usize) -> u32 {
149    u32::from_le_bytes(data[offset + 20..offset + 24].try_into().unwrap())
150}
151
152/// Finds the marker's byte offset within a data segment's raw bytes.
153fn find_marker_in_data(data: &[u8]) -> Option<usize> {
154    if data.len() < MARKER_SIZE {
155        return None;
156    }
157    (0..=data.len() - MARKER_SIZE).find(|&i| is_marker_at(data, i))
158}
159
160struct MarkerRewriter {
161    /// JS payloads indexed by module index: payloads[0] for MODULE_INDEX=0, etc.
162    js_payloads: Vec<Vec<u8>>,
163    /// Total size of all JS payloads combined (for memory growth calculation).
164    total_payload_size: usize,
165    /// Module indices of markers found during the data section scan.
166    markers_found: Vec<u32>,
167    /// Tracks the highest memory address used by existing active data segments.
168    max_data_end: u32,
169    /// Memory offsets where each JS payload will be placed, indexed by module index.
170    js_mem_offsets: Vec<(u32, u32)>,
171    /// The original memory minimum pages (before we grow it).
172    original_memory_min: u32,
173}
174
175impl Reencode for MarkerRewriter {
176    type Error = anyhow::Error;
177
178    fn parse_data(
179        &mut self,
180        data: &mut wasm_encoder::DataSection,
181        datum: wasmparser_encoder::Data<'_>,
182    ) -> Result<(), Error<Self::Error>> {
183        // Track the highest memory address used by active data segments on memory 0
184        if let wasmparser_encoder::DataKind::Active {
185            memory_index: 0,
186            offset_expr,
187        } = &datum.kind
188            && let Some(offset) = eval_const_i32(offset_expr)
189        {
190            let end = offset.saturating_add(datum.data.len() as u32);
191            self.max_data_end = self.max_data_end.max(end);
192        }
193
194        // Check if this segment contains a marker and record its module index
195        if let Some(marker_offset) = find_marker_in_data(datum.data) {
196            let module_index = marker_module_index(datum.data, marker_offset);
197            if self.markers_found.contains(&module_index) {
198                return Err(Error::UserError(anyhow!(
199                    "Found duplicate JS injection marker with MODULE_INDEX={module_index}"
200                )));
201            }
202            self.markers_found.push(module_index);
203        }
204
205        // Emit segment unchanged — the marker's JS_OFFSET stays 0 for now.
206        // We'll patch JS_OFFSET in the final output via patch_js_offsets_in_output.
207        wasm_encoder::reencode::utils::parse_data(self, data, datum)
208    }
209
210    fn parse_data_section(
211        &mut self,
212        data: &mut wasm_encoder::DataSection,
213        section: wasmparser_encoder::DataSectionReader<'_>,
214    ) -> Result<(), Error<Self::Error>> {
215        // Process all existing segments
216        wasm_encoder::reencode::utils::parse_data_section(self, data, section)?;
217
218        // For each found marker (sorted by module index), add a data segment
219        let mut current_offset = page_align(self.max_data_end);
220        let mut sorted_indices = self.markers_found.clone();
221        sorted_indices.sort();
222
223        for module_index in sorted_indices {
224            if let Some(payload) = self.js_payloads.get(module_index as usize) {
225                let offset_expr = wasm_encoder::ConstExpr::i32_const(current_offset as i32);
226                data.active(0, &offset_expr, payload.iter().copied());
227                self.js_mem_offsets.push((module_index, current_offset));
228                current_offset = page_align(current_offset + payload.len() as u32);
229            }
230        }
231
232        Ok(())
233    }
234
235    fn data_count(&mut self, count: u32) -> Result<u32, Error<Self::Error>> {
236        // Add 1 for each extra JS data segment we'll append.
237        Ok(count + self.js_payloads.len() as u32)
238    }
239
240    fn parse_memory_section(
241        &mut self,
242        memories: &mut wasm_encoder::MemorySection,
243        section: wasmparser_encoder::MemorySectionReader<'_>,
244    ) -> Result<(), Error<Self::Error>> {
245        for memory in section {
246            let memory = memory.map_err(Error::ParseError)?;
247
248            self.original_memory_min = memory.initial as u32;
249
250            // Grow memory to fit all JS payloads placed at the end.
251            // Upper bound: original memory + total payload size + page alignment padding per payload.
252            let max_padding = self.js_payloads.len() as u32 * WASM_PAGE_SIZE;
253            let js_end_upper = self.original_memory_min * WASM_PAGE_SIZE
254                + self.total_payload_size as u32
255                + max_padding;
256            let pages_needed = js_end_upper.div_ceil(WASM_PAGE_SIZE);
257            let new_min = pages_needed.max(memory.initial as u32);
258            let new_max = memory.maximum.map(|m| m.max(new_min as u64));
259
260            memories.memory(wasm_encoder::MemoryType {
261                minimum: new_min as u64,
262                maximum: new_max,
263                memory64: memory.memory64,
264                shared: memory.shared,
265                page_size_log2: memory.page_size_log2,
266            });
267        }
268        Ok(())
269    }
270}
271
272impl ReencodeComponent for MarkerRewriter {}
273
274/// Evaluate a const expression to an i32 value (handles i32.const only).
275fn eval_const_i32(expr: &wasmparser_encoder::ConstExpr<'_>) -> Option<u32> {
276    let mut reader = expr.get_operators_reader();
277    if let Ok(wasmparser_encoder::Operator::I32Const { value }) = reader.read() {
278        return Some(value as u32);
279    }
280    None
281}
282
283fn page_align(addr: u32) -> u32 {
284    (addr + WASM_PAGE_SIZE - 1) & !(WASM_PAGE_SIZE - 1)
285}
286
287/// Post-pass: find all markers in the output bytes and patch JS_OFFSET for each.
288/// `offsets` is a list of (module_index, js_mem_offset) pairs.
289fn patch_js_offsets_in_output(output: &mut [u8], offsets: &[(u32, u32)]) -> anyhow::Result<()> {
290    // Collect all marker positions with their module indices
291    let mut marker_positions: Vec<(usize, u32)> = Vec::new();
292    for i in 0..output.len().saturating_sub(MARKER_SIZE) {
293        if is_marker_at(output, i) {
294            let module_idx = marker_module_index(output, i);
295            let js_off = marker_js_offset(output, i);
296            // Only patch markers with JS_OFFSET == 0 (unpatched)
297            if js_off == 0 {
298                marker_positions.push((i, module_idx));
299            }
300        }
301    }
302
303    for &(module_index, js_mem_offset) in offsets {
304        let pos = marker_positions
305            .iter()
306            .find(|(_, idx)| *idx == module_index)
307            .map(|(pos, _)| *pos)
308            .ok_or_else(|| {
309                anyhow!(
310                    "Could not find unpatched marker with MODULE_INDEX={module_index} \
311                     in reencoded output"
312                )
313            })?;
314        // Patch JS_OFFSET at offset 20 (after MAGIC(16) + MODULE_INDEX(4))
315        output[pos + 20..pos + 24].copy_from_slice(&js_mem_offset.to_le_bytes());
316    }
317
318    Ok(())
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn test_create_marker_file() {
327        let marker = create_marker_file(0);
328        assert_eq!(marker.len(), MARKER_SIZE);
329        assert_eq!(&marker[..16], SLOT_MAGIC.as_slice());
330        assert_eq!(u32::from_le_bytes(marker[16..20].try_into().unwrap()), 0); // module_index
331        assert_eq!(u32::from_le_bytes(marker[20..24].try_into().unwrap()), 0); // js_offset
332        assert_eq!(&marker[24..], SLOT_END_MAGIC.as_slice());
333
334        let marker1 = create_marker_file(1);
335        assert_eq!(u32::from_le_bytes(marker1[16..20].try_into().unwrap()), 1);
336        assert_eq!(u32::from_le_bytes(marker1[20..24].try_into().unwrap()), 0);
337    }
338
339    #[test]
340    fn test_find_marker_in_data() {
341        let marker = create_marker_file(0);
342        assert_eq!(find_marker_in_data(&marker), Some(0));
343
344        // Marker embedded in larger data
345        let mut data = vec![0xAA; 100];
346        data.extend_from_slice(&marker);
347        data.extend_from_slice(&[0xBB; 50]);
348        assert_eq!(find_marker_in_data(&data), Some(100));
349
350        // No marker
351        assert_eq!(find_marker_in_data(&[0u8; 100]), None);
352        assert_eq!(find_marker_in_data(&[0u8; 10]), None);
353    }
354
355    #[test]
356    fn test_inject_no_marker() {
357        let component = wasm_encoder::Component::new();
358        let bytes = component.finish();
359        let result = inject_js_into_bytes(&bytes, &["x"]);
360        assert!(result.is_err());
361        assert!(
362            result
363                .unwrap_err()
364                .to_string()
365                .contains("No JS injection markers found")
366        );
367    }
368
369    #[test]
370    fn test_page_align() {
371        assert_eq!(page_align(0), 0);
372        assert_eq!(page_align(1), WASM_PAGE_SIZE);
373        assert_eq!(page_align(WASM_PAGE_SIZE), WASM_PAGE_SIZE);
374        assert_eq!(page_align(WASM_PAGE_SIZE + 1), 2 * WASM_PAGE_SIZE);
375    }
376}