1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
// Copyright (c) The Diem Core Contributors
// Copyright (c) The Move Contributors
// SPDX-License-Identifier: Apache-2.0

//! Implementation of native functions for utf8 strings.

use crate::natives::helpers::make_module_natives;
use move_binary_format::errors::PartialVMResult;
use move_vm_runtime::native_functions::{NativeContext, NativeFunction};
use move_vm_types::{
    loaded_data::runtime_types::Type,
    natives::function::NativeResult,
    pop_arg,
    values::{Value, VectorRef},
};
use std::{collections::VecDeque, sync::Arc};

// The implementation approach delegates all utf8 handling to Rust.
// This is possible without copying of bytes because (a) we can
// get a `std::cell::Ref<Vec<u8>>` from a `vector<u8>` and in turn a `&[u8]`
// from that (b) assuming that `vector<u8>` embedded in a string
// is already valid utf8, we can use `str::from_utf8_unchecked` to
// create a `&str` view on the bytes without a copy. Once we have this
// view, we can call ut8 functions like length, substring, etc.

/***************************************************************************************************
 * native fun internal_check_utf8
 *
 *   gas cost: base_cost + unit_cost * length_in_bytes
 *
 **************************************************************************************************/
#[derive(Debug, Clone)]
pub struct CheckUtf8GasParameters {
    pub base_cost: u64,
    pub unit_cost: u64,
}

fn native_check_utf8(
    gas_params: &CheckUtf8GasParameters,
    _context: &mut NativeContext,
    _ty_args: Vec<Type>,
    mut args: VecDeque<Value>,
) -> PartialVMResult<NativeResult> {
    debug_assert!(args.len() == 1);
    let s_arg = pop_arg!(args, VectorRef);
    let s_ref = s_arg.as_bytes_ref();
    let ok = std::str::from_utf8(s_ref.as_slice()).is_ok();
    // TODO: extensible native cost tables

    let cost = gas_params.base_cost + gas_params.unit_cost * s_ref.as_slice().len() as u64;

    NativeResult::map_partial_vm_result_one(cost, Ok(Value::bool(ok)))
}

pub fn make_native_check_utf8(gas_params: CheckUtf8GasParameters) -> NativeFunction {
    Arc::new(
        move |context, ty_args, args| -> PartialVMResult<NativeResult> {
            native_check_utf8(&gas_params, context, ty_args, args)
        },
    )
}

/***************************************************************************************************
 * native fun internal_is_char_boundary
 *
 *   gas cost: base_cost
 *
 **************************************************************************************************/
#[derive(Debug, Clone)]
pub struct IsCharBoundaryGasParameters {
    pub base_cost: u64,
}

fn native_is_char_boundary(
    gas_params: &IsCharBoundaryGasParameters,
    _context: &mut NativeContext,
    _ty_args: Vec<Type>,
    mut args: VecDeque<Value>,
) -> PartialVMResult<NativeResult> {
    debug_assert!(args.len() == 2);
    let i = pop_arg!(args, u64);
    let s_arg = pop_arg!(args, VectorRef);
    let s_ref = s_arg.as_bytes_ref();
    let ok = unsafe {
        // This is safe because we guarantee the bytes to be utf8.
        std::str::from_utf8_unchecked(s_ref.as_slice()).is_char_boundary(i as usize)
    };
    NativeResult::map_partial_vm_result_one(gas_params.base_cost, Ok(Value::bool(ok)))
}

pub fn make_native_is_char_boundary(gas_params: IsCharBoundaryGasParameters) -> NativeFunction {
    Arc::new(
        move |context, ty_args, args| -> PartialVMResult<NativeResult> {
            native_is_char_boundary(&gas_params, context, ty_args, args)
        },
    )
}

/***************************************************************************************************
 * native fun internal_sub_string
 *
 *   gas cost: base_cost + unit_cost * sub_string_length_in_bytes
 *
 **************************************************************************************************/
#[derive(Debug, Clone)]
pub struct SubStringGasParameters {
    pub base_cost: u64,
    pub unit_cost: u64,
}

fn native_sub_string(
    gas_params: &SubStringGasParameters,
    _context: &mut NativeContext,
    _ty_args: Vec<Type>,
    mut args: VecDeque<Value>,
) -> PartialVMResult<NativeResult> {
    debug_assert!(args.len() == 3);
    let j = pop_arg!(args, u64) as usize;
    let i = pop_arg!(args, u64) as usize;

    if j < i {
        // TODO: what abort code should we use here?
        return Ok(NativeResult::err(gas_params.base_cost, 1));
    }

    let s_arg = pop_arg!(args, VectorRef);
    let s_ref = s_arg.as_bytes_ref();
    let s_str = unsafe {
        // This is safe because we guarantee the bytes to be utf8.
        std::str::from_utf8_unchecked(s_ref.as_slice())
    };
    let v = Value::vector_u8((&s_str[i..j]).as_bytes().iter().cloned());

    let cost = gas_params.base_cost + gas_params.unit_cost * (j - i) as u64;
    NativeResult::map_partial_vm_result_one(cost, Ok(v))
}

pub fn make_native_sub_string(gas_params: SubStringGasParameters) -> NativeFunction {
    Arc::new(
        move |context, ty_args, args| -> PartialVMResult<NativeResult> {
            native_sub_string(&gas_params, context, ty_args, args)
        },
    )
}

/***************************************************************************************************
 * native fun internal_index_of
 *
 *   gas cost: base_cost + unit_cost * bytes_searched
 *
 **************************************************************************************************/
#[derive(Debug, Clone)]
pub struct IndexOfGasParameters {
    pub base_cost: u64,
    pub unit_cost: u64,
}

fn native_index_of(
    gas_params: &IndexOfGasParameters,
    _context: &mut NativeContext,
    _ty_args: Vec<Type>,
    mut args: VecDeque<Value>,
) -> PartialVMResult<NativeResult> {
    debug_assert!(args.len() == 2);
    let r_arg = pop_arg!(args, VectorRef);
    let r_ref = r_arg.as_bytes_ref();
    let r_str = unsafe { std::str::from_utf8_unchecked(r_ref.as_slice()) };
    let s_arg = pop_arg!(args, VectorRef);
    let s_ref = s_arg.as_bytes_ref();
    let s_str = unsafe { std::str::from_utf8_unchecked(s_ref.as_slice()) };
    let pos = match s_str.find(r_str) {
        Some(size) => size,
        None => s_str.len(),
    };
    let cost = gas_params.base_cost
        + gas_params.unit_cost
            * if pos < s_str.len() {
                pos + r_str.len()
            } else {
                pos
            } as u64;
    NativeResult::map_partial_vm_result_one(cost, Ok(Value::u64(pos as u64)))
}

pub fn make_native_index_of(gas_params: IndexOfGasParameters) -> NativeFunction {
    Arc::new(
        move |context, ty_args, args| -> PartialVMResult<NativeResult> {
            native_index_of(&gas_params, context, ty_args, args)
        },
    )
}

/***************************************************************************************************
 * module
 **************************************************************************************************/
#[derive(Debug, Clone)]
pub struct GasParameters {
    pub check_utf8: CheckUtf8GasParameters,
    pub is_char_boundary: IsCharBoundaryGasParameters,
    pub sub_string: SubStringGasParameters,
    pub index_of: IndexOfGasParameters,
}

pub fn make_all(gas_params: GasParameters) -> impl Iterator<Item = (String, NativeFunction)> {
    let natives = [
        (
            "internal_check_utf8",
            make_native_check_utf8(gas_params.check_utf8),
        ),
        (
            "internal_is_char_boundary",
            make_native_is_char_boundary(gas_params.is_char_boundary),
        ),
        (
            "internal_sub_string",
            make_native_sub_string(gas_params.sub_string),
        ),
        (
            "internal_index_of",
            make_native_index_of(gas_params.index_of),
        ),
    ];

    make_module_natives(natives)
}