wasmtime-runtime 6.0.2

Runtime library support for Wasmtime
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
//! Implementation of string transcoding required by the component model.

use anyhow::{anyhow, Result};
use std::cell::Cell;
use std::slice;

const UTF16_TAG: usize = 1 << 31;

/// Macro to define the `VMBuiltinTranscodeArray` type which contains all of the
/// function pointers to the actual transcoder functions. This structure is read
/// by Cranelift-generated code, hence the `repr(C)`.
///
/// Note that this references the `trampolines` module rather than the functions
/// below as the `trampolines` module has the raw ABI.
///
/// This is modeled after the similar macros and usages in `libcalls.rs` and
/// `vmcontext.rs`
macro_rules! define_transcoders {
    (
        $(
            $( #[$attr:meta] )*
            $name:ident( $( $pname:ident: $param:ident ),* ) $( -> $result:ident )?;
        )*
    ) => {
        /// An array that stores addresses of builtin functions. We translate code
        /// to use indirect calls. This way, we don't have to patch the code.
        #[repr(C)]
        pub struct VMBuiltinTranscodeArray {
            $(
                $name: unsafe extern "C" fn(
                    $(define_transcoders!(@ty $param),)*
                    $(define_transcoders!(@retptr $result),)?
                ) $( -> define_transcoders!(@ty $result))?,
            )*
        }

        impl VMBuiltinTranscodeArray {
            pub const INIT: VMBuiltinTranscodeArray = VMBuiltinTranscodeArray {
                $($name: trampolines::$name,)*
            };
        }
    };

    (@ty size) => (usize);
    (@ty size_pair) => (usize);
    (@ty ptr_u8) => (*mut u8);
    (@ty ptr_u16) => (*mut u16);

    (@retptr size_pair) => (*mut usize);
    (@retptr size) => (());
}

wasmtime_environ::foreach_transcoder!(define_transcoders);

/// Submodule with macro-generated constants which are the actual libcall
/// transcoders that are invoked by Cranelift. These functions have a specific
/// ABI defined by the macro itself and will defer to the actual bodies of each
/// implementation following this submodule.
#[allow(improper_ctypes_definitions)]
mod trampolines {
    macro_rules! transcoders {
        (
            $(
                $( #[$attr:meta] )*
                $name:ident( $( $pname:ident: $param:ident ),* ) $( -> $result:ident )?;
            )*
        ) => (
            $(
                pub unsafe extern "C" fn $name(
                    $($pname : define_transcoders!(@ty $param),)*
                    // If a result is given then a `size_pair` results gets its
                    // second result value passed via a return pointer here, so
                    // optionally indicate a return pointer.
                    $(_retptr: define_transcoders!(@retptr $result))?
                ) $( -> define_transcoders!(@ty $result))? {
                    $(transcoders!(@validate_param $pname $param);)*

                    // Always catch panics to avoid trying to unwind from Rust
                    // into Cranelift-generated code which would lead to a Bad
                    // Time.
                    //
                    // Additionally assume that every function below returns a
                    // `Result` where errors turn into traps.
                    let result = std::panic::catch_unwind(|| {
                        super::$name($($pname),*)
                    });
                    match result {
                        Ok(Ok(ret)) => transcoders!(@convert_ret ret _retptr $($result)?),
                        Ok(Err(err)) => crate::traphandlers::raise_trap(
                            crate::traphandlers::TrapReason::User {
                                error: err,
                                needs_backtrace: true,
                            },
                        ),
                        Err(panic) => crate::traphandlers::resume_panic(panic),
                    }
                }
            )*
        );

        (@convert_ret $ret:ident $retptr:ident) => ($ret);
        (@convert_ret $ret:ident $retptr:ident size) => ($ret);
        (@convert_ret $ret:ident $retptr:ident size_pair) => ({
            let (a, b) = $ret;
            *$retptr = b;
            a
        });

        (@validate_param $arg:ident ptr_u16) => ({
            // This should already be guaranteed by the canonical ABI and our
            // adapter modules, but double-check here to be extra-sure. If this
            // is a perf concern it can become a `debug_assert!`.
            assert!(($arg as usize) % 2 == 0, "unaligned 16-bit pointer");
        });
        (@validate_param $arg:ident $ty:ident) => ();
    }

    wasmtime_environ::foreach_transcoder!(transcoders);
}

/// This property should already be guaranteed by construction in the component
/// model but assert it here to be extra sure. Nothing below is sound if regions
/// can overlap.
fn assert_no_overlap<T, U>(a: &[T], b: &[U]) {
    let a_start = a.as_ptr() as usize;
    let a_end = a_start + (a.len() * std::mem::size_of::<T>());
    let b_start = b.as_ptr() as usize;
    let b_end = b_start + (b.len() * std::mem::size_of::<U>());

    if a_start < b_start {
        assert!(a_end < b_start);
    } else {
        assert!(b_end < a_start);
    }
}

/// Converts a utf8 string to a utf8 string.
///
/// The length provided is length of both the source and the destination
/// buffers. No value is returned other than whether an invalid string was
/// found.
unsafe fn utf8_to_utf8(src: *mut u8, len: usize, dst: *mut u8) -> Result<()> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);
    log::trace!("utf8-to-utf8 {len}");
    let src = std::str::from_utf8(src).map_err(|_| anyhow!("invalid utf8 encoding"))?;
    dst.copy_from_slice(src.as_bytes());
    Ok(())
}

/// Converts a utf16 string to a utf16 string.
///
/// The length provided is length of both the source and the destination
/// buffers. No value is returned other than whether an invalid string was
/// found.
unsafe fn utf16_to_utf16(src: *mut u16, len: usize, dst: *mut u16) -> Result<()> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);
    log::trace!("utf16-to-utf16 {len}");
    run_utf16_to_utf16(src, dst)?;
    Ok(())
}

/// Transcodes utf16 to itself, returning whether all code points were inside of
/// the latin1 space.
fn run_utf16_to_utf16(src: &[u16], mut dst: &mut [u16]) -> Result<bool> {
    let mut all_latin1 = true;
    for ch in std::char::decode_utf16(src.iter().map(|i| u16::from_le(*i))) {
        let ch = ch.map_err(|_| anyhow!("invalid utf16 encoding"))?;
        all_latin1 = all_latin1 && u8::try_from(u32::from(ch)).is_ok();
        let result = ch.encode_utf16(dst);
        let size = result.len();
        for item in result {
            *item = item.to_le();
        }
        dst = &mut dst[size..];
    }
    Ok(all_latin1)
}

/// Converts a latin1 string to a latin1 string.
///
/// Given that all byte sequences are valid latin1 strings this is simply a
/// memory copy.
unsafe fn latin1_to_latin1(src: *mut u8, len: usize, dst: *mut u8) -> Result<()> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);
    log::trace!("latin1-to-latin1 {len}");
    dst.copy_from_slice(src);
    Ok(())
}

/// Converts a latin1 string to a utf16 string.
///
/// This simply inflates the latin1 characters to the u16 code points. The
/// length provided is the same length of the source and destination buffers.
unsafe fn latin1_to_utf16(src: *mut u8, len: usize, dst: *mut u16) -> Result<()> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);
    for (src, dst) in src.iter().zip(dst) {
        *dst = u16::from(*src).to_le();
    }
    log::trace!("latin1-to-utf16 {len}");
    Ok(())
}

/// Converts utf8 to utf16.
///
/// The length provided is the same unit length of both buffers, and the
/// returned value from this function is how many u16 units were written.
unsafe fn utf8_to_utf16(src: *mut u8, len: usize, dst: *mut u16) -> Result<usize> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);

    let result = run_utf8_to_utf16(src, dst)?;
    log::trace!("utf8-to-utf16 {len} => {result}");
    Ok(result)
}

fn run_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> Result<usize> {
    let src = std::str::from_utf8(src).map_err(|_| anyhow!("invalid utf8 encoding"))?;
    let mut amt = 0;
    for (i, dst) in src.encode_utf16().zip(dst) {
        *dst = i.to_le();
        amt += 1;
    }
    Ok(amt)
}

/// Converts utf16 to utf8.
///
/// Each buffer is specified independently here and the returned value is a pair
/// of the number of code units read and code units written. This might perform
/// a partial transcode if the destination buffer is not large enough to hold
/// the entire contents.
unsafe fn utf16_to_utf8(
    src: *mut u16,
    src_len: usize,
    dst: *mut u8,
    dst_len: usize,
) -> Result<(usize, usize)> {
    let src = slice::from_raw_parts(src, src_len);
    let mut dst = slice::from_raw_parts_mut(dst, dst_len);
    assert_no_overlap(src, dst);

    // This iterator will convert to native endianness and additionally count
    // how many items have been read from the iterator so far. This
    // count is used to return how many of the source code units were read.
    let src_iter_read = Cell::new(0);
    let src_iter = src.iter().map(|i| {
        src_iter_read.set(src_iter_read.get() + 1);
        u16::from_le(*i)
    });

    let mut src_read = 0;
    let mut dst_written = 0;

    for ch in std::char::decode_utf16(src_iter) {
        let ch = ch.map_err(|_| anyhow!("invalid utf16 encoding"))?;

        // If the destination doesn't have enough space for this character
        // then the loop is ended and this function will be called later with a
        // larger destination buffer.
        if dst.len() < 4 && dst.len() < ch.len_utf8() {
            break;
        }

        // Record that characters were read and then convert the `char` to
        // utf-8, advancing the destination buffer.
        src_read = src_iter_read.get();
        let len = ch.encode_utf8(dst).len();
        dst_written += len;
        dst = &mut dst[len..];
    }

    log::trace!("utf16-to-utf8 {src_len}/{dst_len} => {src_read}/{dst_written}");
    Ok((src_read, dst_written))
}

/// Converts latin1 to utf8.
///
/// Receives the independent size of both buffers and returns the number of code
/// units read and code units written (both bytes in this case).
///
/// This may perform a partial encoding if the destination is not large enough.
unsafe fn latin1_to_utf8(
    src: *mut u8,
    src_len: usize,
    dst: *mut u8,
    dst_len: usize,
) -> Result<(usize, usize)> {
    let src = slice::from_raw_parts(src, src_len);
    let dst = slice::from_raw_parts_mut(dst, dst_len);
    assert_no_overlap(src, dst);
    let (read, written) = encoding_rs::mem::convert_latin1_to_utf8_partial(src, dst);
    log::trace!("latin1-to-utf8 {src_len}/{dst_len} => ({read}, {written})");
    Ok((read, written))
}

/// Converts utf16 to "latin1+utf16", probably using a utf16 encoding.
///
/// The length specified is the length of both the source and destination
/// buffers. If the source string has any characters that don't fit in the
/// latin1 code space (0xff and below) then a utf16-tagged length will be
/// returned. Otherwise the string is "deflated" from a utf16 string to a latin1
/// string and the latin1 length is returned.
unsafe fn utf16_to_compact_probably_utf16(
    src: *mut u16,
    len: usize,
    dst: *mut u16,
) -> Result<usize> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);
    let all_latin1 = run_utf16_to_utf16(src, dst)?;
    if all_latin1 {
        let (left, dst, right) = dst.align_to_mut::<u8>();
        assert!(left.is_empty());
        assert!(right.is_empty());
        for i in 0..len {
            dst[i] = dst[2 * i];
        }
        log::trace!("utf16-to-compact-probably-utf16 {len} => latin1 {len}");
        Ok(len)
    } else {
        log::trace!("utf16-to-compact-probably-utf16 {len} => utf16 {len}");
        Ok(len | UTF16_TAG)
    }
}

/// Converts a utf8 string to latin1.
///
/// The length specified is the same length of both the input and the output
/// buffers.
///
/// Returns the number of code units read from the source and the number of code
/// units written to the destination.
///
/// Note that this may not convert the entire source into the destination if the
/// original utf8 string has usvs not representable in latin1.
unsafe fn utf8_to_latin1(src: *mut u8, len: usize, dst: *mut u8) -> Result<(usize, usize)> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);
    let read = encoding_rs::mem::utf8_latin1_up_to(src);
    let written = encoding_rs::mem::convert_utf8_to_latin1_lossy(&src[..read], dst);
    log::trace!("utf8-to-latin1 {len} => ({read}, {written})");
    Ok((read, written))
}

/// Converts a utf16 string to latin1
///
/// This is the same as `utf8_to_latin1` in terms of parameters/results.
unsafe fn utf16_to_latin1(src: *mut u16, len: usize, dst: *mut u8) -> Result<(usize, usize)> {
    let src = slice::from_raw_parts(src, len);
    let dst = slice::from_raw_parts_mut(dst, len);
    assert_no_overlap(src, dst);

    let mut size = 0;
    for (src, dst) in src.iter().zip(dst) {
        let src = u16::from_le(*src);
        match u8::try_from(src) {
            Ok(src) => *dst = src,
            Err(_) => break,
        }
        size += 1;
    }
    log::trace!("utf16-to-latin1 {len} => {size}");
    Ok((size, size))
}

/// Converts a utf8 string to a utf16 string which has been partially converted
/// as latin1 prior.
///
/// The original string has already been partially transcoded with
/// `utf8_to_latin1` and that was determined to not be able to transcode the
/// entire string. The substring of the source that couldn't be encoded into
/// latin1 is passed here via `src` and `src_len`.
///
/// The destination buffer is specified by `dst` and `dst_len`. The first
/// `latin1_bytes_so_far` bytes (not code units) of the `dst` buffer have
/// already been filled in with latin1 characters and need to be inflated
/// in-place to their utf16 equivalents.
///
/// After the initial latin1 code units have been inflated the entirety of `src`
/// is then transcoded into the remaining space within `dst`.
unsafe fn utf8_to_compact_utf16(
    src: *mut u8,
    src_len: usize,
    dst: *mut u16,
    dst_len: usize,
    latin1_bytes_so_far: usize,
) -> Result<usize> {
    let src = slice::from_raw_parts(src, src_len);
    let dst = slice::from_raw_parts_mut(dst, dst_len);
    assert_no_overlap(src, dst);

    let dst = inflate_latin1_bytes(dst, latin1_bytes_so_far);
    let result = run_utf8_to_utf16(src, dst)?;
    log::trace!("utf8-to-compact-utf16 {src_len}/{dst_len}/{latin1_bytes_so_far} => {result}");
    Ok(result + latin1_bytes_so_far)
}

/// Same as `utf8_to_compact_utf16` but for utf16 source strings.
unsafe fn utf16_to_compact_utf16(
    src: *mut u16,
    src_len: usize,
    dst: *mut u16,
    dst_len: usize,
    latin1_bytes_so_far: usize,
) -> Result<usize> {
    let src = slice::from_raw_parts(src, src_len);
    let dst = slice::from_raw_parts_mut(dst, dst_len);
    assert_no_overlap(src, dst);

    let dst = inflate_latin1_bytes(dst, latin1_bytes_so_far);
    run_utf16_to_utf16(src, dst)?;
    let result = src.len();
    log::trace!("utf16-to-compact-utf16 {src_len}/{dst_len}/{latin1_bytes_so_far} => {result}");
    Ok(result + latin1_bytes_so_far)
}

/// Inflates the `latin1_bytes_so_far` number of bytes written to the beginning
/// of `dst` into u16 codepoints.
///
/// Returns the remaining space in the destination that can be transcoded into,
/// slicing off the prefix of the string that was inflated from the latin1
/// bytes.
fn inflate_latin1_bytes(dst: &mut [u16], latin1_bytes_so_far: usize) -> &mut [u16] {
    // Note that `latin1_bytes_so_far` is a byte measure while `dst` is a region
    // of u16 units. This `split_at_mut` uses the byte index as an index into
    // the u16 unit because each of the latin1 bytes will become a whole code
    // unit in the destination which is 2 bytes large.
    let (to_inflate, rest) = dst.split_at_mut(latin1_bytes_so_far);

    // Use a byte-oriented view to inflate the original latin1 bytes.
    let (left, mid, right) = unsafe { to_inflate.align_to_mut::<u8>() };
    assert!(left.is_empty());
    assert!(right.is_empty());
    for i in (0..latin1_bytes_so_far).rev() {
        mid[2 * i] = mid[i];
        mid[2 * i + 1] = 0;
    }

    return rest;
}