ckb_script/
types.rs

1//! Common type definitions for ckb-script package.
2
3use crate::{error::ScriptError, verify_env::TxVerifyEnv};
4use ckb_chain_spec::consensus::Consensus;
5use ckb_types::{
6    core::{
7        Cycle, ScriptHashType,
8        cell::{CellMeta, ResolvedTransaction},
9    },
10    packed::{Byte32, CellOutput, OutPoint, Script},
11    prelude::*,
12};
13use ckb_vm::{
14    ISA_B, ISA_IMC, ISA_MOP, Syscalls,
15    machine::{VERSION0, VERSION1, VERSION2},
16};
17use serde::{Deserialize, Serialize};
18use std::collections::{BTreeMap, HashMap};
19use std::fmt;
20use std::sync::{
21    Arc, Mutex, RwLock,
22    atomic::{AtomicU64, Ordering},
23};
24
25use ckb_traits::CellDataProvider;
26use ckb_vm::snapshot2::Snapshot2Context;
27
28use ckb_vm::{
29    DefaultMachineRunner, RISCV_GENERAL_REGISTER_NUMBER, SupportMachine,
30    bytes::Bytes,
31    machine::Pause,
32    snapshot2::{DataSource, Snapshot2},
33};
34use std::mem::size_of;
35
36/// The type of CKB-VM ISA.
37pub type VmIsa = u8;
38/// /// The type of CKB-VM version.
39pub type VmVersion = u32;
40
41/// The default machine type when asm feature is enabled. Note that ckb-script now functions
42/// solely based on ckb_vm::DefaultMachineRunner trait. The type provided here is only for
43/// default implementations.
44#[cfg(has_asm)]
45pub type Machine = ckb_vm::machine::asm::AsmMachine;
46/// The default machine type when neither asm feature nor flatmemory feature is not enabled
47#[cfg(all(not(has_asm), not(feature = "flatmemory")))]
48pub type Machine = ckb_vm::TraceMachine<
49    ckb_vm::DefaultCoreMachine<u64, ckb_vm::WXorXMemory<ckb_vm::SparseMemory<u64>>>,
50>;
51/// The default machine type when asm feature is not enabled, but flatmemory is enabled
52#[cfg(all(not(has_asm), feature = "flatmemory"))]
53pub type Machine = ckb_vm::TraceMachine<
54    ckb_vm::DefaultCoreMachine<u64, ckb_vm::WXorXMemory<ckb_vm::FlatMemory<u64>>>,
55>;
56
57/// Debug printer function type
58pub type DebugPrinter = Arc<dyn Fn(&Byte32, &str) + Send + Sync>;
59/// Syscall generator function type
60pub type SyscallGenerator<DL, V, M> =
61    fn(&VmId, &SgData<DL>, &VmContext<DL>, &V) -> Vec<Box<(dyn Syscalls<M>)>>;
62
63/// The version of CKB Script Verifier.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
65pub enum ScriptVersion {
66    /// CKB VM 0 with Syscall version 1.
67    V0 = 0,
68    /// CKB VM 1 with Syscall version 1 and version 2.
69    V1 = 1,
70    /// CKB VM 2 with Syscall version 1, version 2 and version 3.
71    V2 = 2,
72}
73
74impl ScriptVersion {
75    /// Returns the latest version.
76    pub const fn latest() -> Self {
77        Self::V2
78    }
79
80    /// Returns the ISA set of CKB VM in current script version.
81    pub fn vm_isa(self) -> VmIsa {
82        match self {
83            Self::V0 => ISA_IMC,
84            Self::V1 => ISA_IMC | ISA_B | ISA_MOP,
85            Self::V2 => ISA_IMC | ISA_B | ISA_MOP,
86        }
87    }
88
89    /// Returns the version of CKB VM in current script version.
90    pub fn vm_version(self) -> VmVersion {
91        match self {
92            Self::V0 => VERSION0,
93            Self::V1 => VERSION1,
94            Self::V2 => VERSION2,
95        }
96    }
97
98    /// Returns the specific data script hash type.
99    ///
100    /// Returns:
101    /// - `ScriptHashType::Data` for version 0;
102    /// - `ScriptHashType::Data1` for version 1;
103    pub fn data_hash_type(self) -> ScriptHashType {
104        match self {
105            Self::V0 => ScriptHashType::Data,
106            Self::V1 => ScriptHashType::Data1,
107            Self::V2 => ScriptHashType::Data2,
108        }
109    }
110
111    /// Creates a CKB VM core machine without cycles limit.
112    ///
113    /// In fact, there is still a limit of `max_cycles` which is set to `2^64-1`.
114    pub fn init_core_machine_without_limit(self) -> <Machine as DefaultMachineRunner>::Inner {
115        self.init_core_machine(u64::MAX)
116    }
117
118    /// Creates a CKB VM core machine.
119    pub fn init_core_machine(self, max_cycles: Cycle) -> <Machine as DefaultMachineRunner>::Inner {
120        let isa = self.vm_isa();
121        let version = self.vm_version();
122        <<Machine as DefaultMachineRunner>::Inner as SupportMachine>::new(isa, version, max_cycles)
123    }
124}
125
126/// A script group is defined as scripts that share the same hash.
127///
128/// A script group will only be executed once per transaction, the
129/// script itself should check against all inputs/outputs in its group
130/// if needed.
131#[derive(Clone, Debug)]
132pub struct ScriptGroup {
133    /// The script.
134    ///
135    /// A script group is a group of input and output cells that share the same script.
136    pub script: Script,
137    /// The script group type.
138    pub group_type: ScriptGroupType,
139    /// Indices of input cells.
140    pub input_indices: Vec<usize>,
141    /// Indices of output cells.
142    pub output_indices: Vec<usize>,
143}
144
145/// The methods included here are defected in a way: all construction
146/// methods here create ScriptGroup without any `input_indices` or
147/// `output_indices` filled. One has to manually fill them later(or forgot
148/// about this).
149/// As a result, we are marking them as crate-only methods for now. This
150/// forces users to one of the following 2 solutions:
151/// * Call `groups()` on `TxData` so they can fetch `ScriptGroup` data with
152///   all correct data filled.
153/// * Manually construct the struct where they have to think what shall be
154///   used for `input_indices` and `output_indices`.
155impl ScriptGroup {
156    /// Creates a new script group struct.
157    pub(crate) fn new(script: &Script, group_type: ScriptGroupType) -> Self {
158        Self {
159            group_type,
160            script: script.to_owned(),
161            input_indices: vec![],
162            output_indices: vec![],
163        }
164    }
165
166    /// Creates a lock script group.
167    pub(crate) fn from_lock_script(script: &Script) -> Self {
168        Self::new(script, ScriptGroupType::Lock)
169    }
170
171    /// Creates a type script group.
172    pub(crate) fn from_type_script(script: &Script) -> Self {
173        Self::new(script, ScriptGroupType::Type)
174    }
175}
176
177/// The script group type.
178///
179/// A cell can have a lock script and an optional type script. Even they reference the same script,
180/// lock script and type script will not be grouped together.
181#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)]
182#[serde(rename_all = "snake_case")]
183pub enum ScriptGroupType {
184    /// Lock script group.
185    Lock,
186    /// Type script group.
187    Type,
188}
189
190impl fmt::Display for ScriptGroupType {
191    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
192        match self {
193            ScriptGroupType::Lock => write!(f, "Lock"),
194            ScriptGroupType::Type => write!(f, "Type"),
195        }
196    }
197}
198
199/// Struct specifies which script has verified so far.
200/// State is lifetime free, but capture snapshot need heavy memory copy
201#[derive(Clone)]
202pub struct TransactionState {
203    /// current suspended script index
204    pub current: usize,
205    /// vm scheduler suspend state
206    pub state: Option<FullSuspendedState>,
207    /// current consumed cycle
208    pub current_cycles: Cycle,
209    /// limit cycles
210    pub limit_cycles: Cycle,
211}
212
213impl TransactionState {
214    /// Creates a new TransactionState struct
215    pub fn new(
216        state: Option<FullSuspendedState>,
217        current: usize,
218        current_cycles: Cycle,
219        limit_cycles: Cycle,
220    ) -> Self {
221        TransactionState {
222            current,
223            state,
224            current_cycles,
225            limit_cycles,
226        }
227    }
228
229    /// Return next limit cycles according to max_cycles and step_cycles
230    pub fn next_limit_cycles(&self, step_cycles: Cycle, max_cycles: Cycle) -> (Cycle, bool) {
231        let remain = max_cycles - self.current_cycles;
232        let next_limit = self.limit_cycles + step_cycles;
233
234        if next_limit < remain {
235            (next_limit, false)
236        } else {
237            (remain, true)
238        }
239    }
240}
241
242/// Enum represent resumable verify result
243#[allow(clippy::large_enum_variant)]
244#[derive(Debug)]
245pub enum VerifyResult {
246    /// Completed total cycles
247    Completed(Cycle),
248    /// Suspended state
249    Suspended(TransactionState),
250}
251
252impl std::fmt::Debug for TransactionState {
253    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> std::fmt::Result {
254        f.debug_struct("TransactionState")
255            .field("current", &self.current)
256            .field("current_cycles", &self.current_cycles)
257            .field("limit_cycles", &self.limit_cycles)
258            .finish()
259    }
260}
261
262/// ChunkCommand is used to control the verification process to suspend or resume
263#[derive(Eq, PartialEq, Clone, Debug)]
264pub enum ChunkCommand {
265    /// Suspend the verification process
266    Suspend,
267    /// Resume the verification process
268    Resume,
269    /// Stop the verification process
270    Stop,
271}
272
273/// VM id type
274pub type VmId = u64;
275/// The first VM booted always have 0 as the ID
276pub const FIRST_VM_ID: VmId = 0;
277
278/// File descriptor
279#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
280pub struct Fd(pub u64);
281
282/// The first FD to be used
283pub const FIRST_FD_SLOT: u64 = 2;
284
285impl Fd {
286    /// Creates a new pipe with 2 fds, also return the next available fd slot
287    pub fn create(slot: u64) -> (Fd, Fd, u64) {
288        (Fd(slot), Fd(slot + 1), slot + 2)
289    }
290
291    /// Finds the other file descriptor of a pipe
292    pub fn other_fd(&self) -> Fd {
293        Fd(self.0 ^ 0x1)
294    }
295
296    /// Tests if current fd is used for reading from a pipe
297    pub fn is_read(&self) -> bool {
298        self.0 % 2 == 0
299    }
300
301    /// Tests if current fd is used for writing to a pipe
302    pub fn is_write(&self) -> bool {
303        self.0 % 2 == 1
304    }
305}
306
307/// VM is in waiting-to-read state.
308#[derive(Clone, Debug, PartialEq, Eq, Hash)]
309pub struct ReadState {
310    /// FD to read from
311    pub fd: Fd,
312    /// Length to read
313    pub length: u64,
314    /// VM address to read data into
315    pub buffer_addr: u64,
316    /// Length address to keep final read length
317    pub length_addr: u64,
318}
319
320/// VM is in waiting-to-write state.
321#[derive(Clone, Debug, PartialEq, Eq, Hash)]
322pub struct WriteState {
323    /// FD to write to
324    pub fd: Fd,
325    /// Bytes that have already been written
326    pub consumed: u64,
327    /// Length to write
328    pub length: u64,
329    /// VM address to write data from
330    pub buffer_addr: u64,
331    /// Length of address to keep final written length
332    pub length_addr: u64,
333}
334
335/// VM State.
336#[derive(Clone, Debug, PartialEq, Eq, Hash)]
337pub enum VmState {
338    /// Runnable.
339    Runnable,
340    /// Terminated.
341    Terminated,
342    /// Wait.
343    Wait {
344        /// Target vm id.
345        target_vm_id: VmId,
346        /// Exit code addr.
347        exit_code_addr: u64,
348    },
349    /// WaitForWrite.
350    WaitForWrite(WriteState),
351    /// WaitForRead.
352    WaitForRead(ReadState),
353}
354
355/// Used to specify the location of script data.
356#[derive(Clone, Debug)]
357pub struct DataLocation {
358    /// A pointer to the data.
359    pub data_piece_id: DataPieceId,
360    /// Data offset.
361    pub offset: u64,
362    /// Data length.
363    pub length: u64,
364}
365
366/// Arguments for exec syscall
367#[derive(Clone, Debug)]
368pub struct ExecV2Args {
369    /// Data location for the program to invoke
370    pub location: DataLocation,
371    /// Argc
372    pub argc: u64,
373    /// Argv
374    pub argv: u64,
375}
376
377/// Arguments for spawn syscall
378#[derive(Clone, Debug)]
379pub struct SpawnArgs {
380    /// Data location for the program to spawn
381    pub location: DataLocation,
382    /// Argc
383    pub argc: u64,
384    /// Argv
385    pub argv: u64,
386    /// File descriptors to pass to spawned child process
387    pub fds: Vec<Fd>,
388    /// VM address to keep pid for spawned child process
389    pub process_id_addr: u64,
390}
391
392/// Arguments for wait syscall
393#[derive(Clone, Debug)]
394pub struct WaitArgs {
395    /// VM ID to wait for termination
396    pub target_id: VmId,
397    /// VM address to keep exit code for the waited process
398    pub exit_code_addr: u64,
399}
400
401/// Arguments for pipe syscall
402#[derive(Clone, Debug)]
403pub struct PipeArgs {
404    /// VM address to keep the first created file descriptor
405    pub fd1_addr: u64,
406    /// VM address to keep the second created file descriptor
407    pub fd2_addr: u64,
408}
409
410/// Arguments shared by read, write and inherited fd syscalls
411#[derive(Clone, Debug)]
412pub struct FdArgs {
413    /// For each and write syscalls, this contains the file descriptor to use
414    pub fd: Fd,
415    /// Length to read or length to write for read/write syscalls. Inherited
416    /// fd syscall will ignore this field.
417    pub length: u64,
418    /// VM address to keep returned data
419    pub buffer_addr: u64,
420    /// VM address for a input / output length buffer.
421    /// For read / write syscalls, this contains the actual data length.
422    /// For inherited fd syscall, this contains the number of file descriptors.
423    pub length_addr: u64,
424}
425
426/// Inter-process message, this is now used for implementing syscalls, but might
427/// be expanded for more usages later.
428#[derive(Clone, Debug)]
429pub enum Message {
430    /// Exec syscall
431    ExecV2(VmId, ExecV2Args),
432    /// Spawn syscall
433    Spawn(VmId, SpawnArgs),
434    /// Wait syscall
435    Wait(VmId, WaitArgs),
436    /// Pipe syscall
437    Pipe(VmId, PipeArgs),
438    /// Read syscall
439    FdRead(VmId, FdArgs),
440    /// Write syscall
441    FdWrite(VmId, FdArgs),
442    /// Inherited FD syscall
443    InheritedFileDescriptor(VmId, FdArgs),
444    /// Close syscall
445    Close(VmId, Fd),
446}
447
448/// A pointer to the data that is part of the transaction.
449#[derive(Clone, Debug, PartialEq, Eq, Hash)]
450pub enum DataPieceId {
451    /// The nth input cell data.
452    Input(u32),
453    /// The nth output data.
454    Output(u32),
455    /// The nth cell dep cell data.
456    CellDep(u32),
457    /// The nth group input cell data.
458    GroupInput(u32),
459    /// The nth group output data.
460    GroupOutput(u32),
461    /// The nth witness.
462    Witness(u32),
463    /// The nth witness group input.
464    WitnessGroupInput(u32),
465    /// The nth witness group output.
466    WitnessGroupOutput(u32),
467}
468
469impl TryFrom<(u64, u64, u64)> for DataPieceId {
470    type Error = String;
471
472    fn try_from(value: (u64, u64, u64)) -> Result<Self, Self::Error> {
473        let (source, index, place) = value;
474        let index: u32 =
475            u32::try_from(index).map_err(|e| format!("Error casting index to u32: {}", e))?;
476        match (source, place) {
477            (1, 0) => Ok(DataPieceId::Input(index)),
478            (2, 0) => Ok(DataPieceId::Output(index)),
479            (3, 0) => Ok(DataPieceId::CellDep(index)),
480            (0x0100000000000001, 0) => Ok(DataPieceId::GroupInput(index)),
481            (0x0100000000000002, 0) => Ok(DataPieceId::GroupOutput(index)),
482            (1, 1) => Ok(DataPieceId::Witness(index)),
483            (2, 1) => Ok(DataPieceId::Witness(index)),
484            (0x0100000000000001, 1) => Ok(DataPieceId::WitnessGroupInput(index)),
485            (0x0100000000000002, 1) => Ok(DataPieceId::WitnessGroupOutput(index)),
486            _ => Err(format!("Invalid source value: {:#x}", source)),
487        }
488    }
489}
490
491/// Full state representing all VM instances from verifying a CKB script.
492/// It should be serializable to binary formats, while also be able to
493/// fully recover the running environment with the full transaction environment.
494#[derive(Clone, Debug)]
495pub struct FullSuspendedState {
496    /// Total executed cycles
497    pub total_cycles: Cycle,
498    /// Iteration cycles. Due to an implementation bug in Meepo hardfork,
499    /// this value will not always be zero at visible execution boundaries.
500    /// We will have to preserve this value.
501    pub iteration_cycles: Cycle,
502    /// Next available VM ID
503    pub next_vm_id: VmId,
504    /// Next available file descriptor
505    pub next_fd_slot: u64,
506    /// Suspended VMs
507    pub vms: Vec<(VmId, VmState, Snapshot2<DataPieceId>)>,
508    /// Opened file descriptors with owners
509    pub fds: Vec<(Fd, VmId)>,
510    /// Inherited file descriptors for each spawned process
511    pub inherited_fd: Vec<(VmId, Vec<Fd>)>,
512    /// Terminated VMs with exit codes
513    pub terminated_vms: Vec<(VmId, i8)>,
514    /// Currently instantiated VMs. Upon resumption, those VMs will
515    /// be instantiated.
516    pub instantiated_ids: Vec<VmId>,
517}
518
519impl FullSuspendedState {
520    /// Calculates the size of current suspended state, should be used
521    /// to derive cycles charged for suspending / resuming.
522    pub fn size(&self) -> u64 {
523        (size_of::<Cycle>()
524            + size_of::<VmId>()
525            + size_of::<u64>()
526            + size_of::<u64>()
527            + self.vms.iter().fold(0, |mut acc, (_, _, snapshot)| {
528                acc += size_of::<VmId>() + size_of::<VmState>();
529                acc += snapshot.pages_from_source.len()
530                    * (size_of::<u64>()
531                        + size_of::<u8>()
532                        + size_of::<DataPieceId>()
533                        + size_of::<u64>()
534                        + size_of::<u64>());
535                for dirty_page in &snapshot.dirty_pages {
536                    acc += size_of::<u64>() + size_of::<u8>() + dirty_page.2.len();
537                }
538                acc += size_of::<u32>()
539                    + RISCV_GENERAL_REGISTER_NUMBER * size_of::<u64>()
540                    + size_of::<u64>()
541                    + size_of::<u64>()
542                    + size_of::<u64>();
543                acc
544            })
545            + (self.fds.len() * (size_of::<Fd>() + size_of::<VmId>()))) as u64
546            + (self.inherited_fd.len() * (size_of::<Fd>())) as u64
547            + (self.terminated_vms.len() * (size_of::<VmId>() + size_of::<i8>())) as u64
548            + (self.instantiated_ids.len() * size_of::<VmId>()) as u64
549    }
550}
551
552/// A cell that is either loaded, or not yet loaded.
553#[derive(Debug, PartialEq, Eq, Clone)]
554pub enum DataGuard {
555    /// Un-loaded out point
556    NotLoaded(OutPoint),
557    /// Loaded data
558    Loaded(Bytes),
559}
560
561/// LazyData wrapper make sure not-loaded data will be loaded only after one access
562#[derive(Debug, Clone)]
563pub struct LazyData(Arc<RwLock<DataGuard>>);
564
565impl LazyData {
566    fn from_cell_meta(cell_meta: &CellMeta) -> LazyData {
567        match &cell_meta.mem_cell_data {
568            Some(data) => LazyData(Arc::new(RwLock::new(DataGuard::Loaded(data.to_owned())))),
569            None => LazyData(Arc::new(RwLock::new(DataGuard::NotLoaded(
570                cell_meta.out_point.clone(),
571            )))),
572        }
573    }
574
575    fn access<DL: CellDataProvider>(&self, data_loader: &DL) -> Result<Bytes, ScriptError> {
576        let guard = self
577            .0
578            .read()
579            .map_err(|_| ScriptError::Other("RwLock poisoned".into()))?
580            .to_owned();
581        match guard {
582            DataGuard::NotLoaded(out_point) => {
583                let data = data_loader
584                    .get_cell_data(&out_point)
585                    .ok_or(ScriptError::Other("cell data not found".into()))?;
586                let mut write_guard = self
587                    .0
588                    .write()
589                    .map_err(|_| ScriptError::Other("RwLock poisoned".into()))?;
590                *write_guard = DataGuard::Loaded(data.clone());
591                Ok(data)
592            }
593            DataGuard::Loaded(bytes) => Ok(bytes),
594        }
595    }
596}
597
598/// A tri-state enum for representing binary lookup results.
599#[derive(Debug, Clone)]
600pub enum Binaries {
601    /// A unique cell is found for the requested binary.
602    Unique(Byte32, usize, LazyData),
603    /// Multiple cells are found for the requested binary, but all
604    /// the cells contains the same content(hence binary lookup still
605    /// succeeds).
606    Duplicate(Byte32, usize, LazyData),
607    /// Multiple cells are found for the requested binary, and they
608    /// differ so there is no way to tell which binary shall be used.
609    Multiple,
610}
611
612impl Binaries {
613    fn new(data_hash: Byte32, dep_index: usize, data: LazyData) -> Self {
614        Self::Unique(data_hash, dep_index, data)
615    }
616
617    fn merge(&mut self, data_hash: &Byte32) {
618        match self {
619            Self::Unique(hash, dep_index, data) | Self::Duplicate(hash, dep_index, data) => {
620                if hash != data_hash {
621                    *self = Self::Multiple;
622                } else {
623                    *self = Self::Duplicate(hash.to_owned(), *dep_index, data.to_owned());
624                }
625            }
626            Self::Multiple => {}
627        }
628    }
629}
630
631/// Immutable context data at transaction level
632#[derive(Clone, Debug)]
633pub struct TxData<DL> {
634    /// ResolvedTransaction.
635    pub rtx: Arc<ResolvedTransaction>,
636
637    /// Passed & derived information.
638    pub info: Arc<TxInfo<DL>>,
639}
640
641/// Information that is either passed as the context of the transaction,
642/// or can be derived from the transaction.
643#[derive(Clone, Debug)]
644pub struct TxInfo<DL> {
645    /// Data loader.
646    pub data_loader: DL,
647    /// Chain consensus parameters
648    pub consensus: Arc<Consensus>,
649    /// Transaction verification environment
650    pub tx_env: Arc<TxVerifyEnv>,
651
652    /// Potential binaries in current transaction indexed by data hash
653    pub binaries_by_data_hash: HashMap<Byte32, (usize, LazyData)>,
654    /// Potential binaries in current transaction indexed by type script hash
655    pub binaries_by_type_hash: HashMap<Byte32, Binaries>,
656    /// Lock script groups, orders here are important
657    pub lock_groups: BTreeMap<Byte32, ScriptGroup>,
658    /// Type script groups, orders here are important
659    pub type_groups: BTreeMap<Byte32, ScriptGroup>,
660    /// Output cells in current transaction reorganized in CellMeta format
661    pub outputs: Vec<CellMeta>,
662}
663
664impl<DL> TxData<DL>
665where
666    DL: CellDataProvider,
667{
668    /// Creates a new TxData structure
669    pub fn new(
670        rtx: Arc<ResolvedTransaction>,
671        data_loader: DL,
672        consensus: Arc<Consensus>,
673        tx_env: Arc<TxVerifyEnv>,
674    ) -> Self {
675        let tx_hash = rtx.transaction.hash();
676        let resolved_cell_deps = &rtx.resolved_cell_deps;
677        let resolved_inputs = &rtx.resolved_inputs;
678        let outputs = rtx
679            .transaction
680            .outputs_with_data_iter()
681            .enumerate()
682            .map(|(index, (cell_output, data))| {
683                let out_point = OutPoint::new_builder()
684                    .tx_hash(tx_hash.clone())
685                    .index(index.pack())
686                    .build();
687                let data_hash = CellOutput::calc_data_hash(&data);
688                CellMeta {
689                    cell_output,
690                    out_point,
691                    transaction_info: None,
692                    data_bytes: data.len() as u64,
693                    mem_cell_data: Some(data),
694                    mem_cell_data_hash: Some(data_hash),
695                }
696            })
697            .collect();
698
699        let mut binaries_by_data_hash: HashMap<Byte32, (usize, LazyData)> = HashMap::default();
700        let mut binaries_by_type_hash: HashMap<Byte32, Binaries> = HashMap::default();
701        for (i, cell_meta) in resolved_cell_deps.iter().enumerate() {
702            let data_hash = data_loader
703                .load_cell_data_hash(cell_meta)
704                .expect("cell data hash");
705            let lazy = LazyData::from_cell_meta(cell_meta);
706            binaries_by_data_hash.insert(data_hash.to_owned(), (i, lazy.to_owned()));
707
708            if let Some(t) = &cell_meta.cell_output.type_().to_opt() {
709                binaries_by_type_hash
710                    .entry(t.calc_script_hash())
711                    .and_modify(|bin| bin.merge(&data_hash))
712                    .or_insert_with(|| Binaries::new(data_hash.to_owned(), i, lazy.to_owned()));
713            }
714        }
715
716        let mut lock_groups = BTreeMap::default();
717        let mut type_groups = BTreeMap::default();
718        for (i, cell_meta) in resolved_inputs.iter().enumerate() {
719            // here we are only pre-processing the data, verify method validates
720            // each input has correct script setup.
721            let output = &cell_meta.cell_output;
722            let lock_group_entry = lock_groups
723                .entry(output.calc_lock_hash())
724                .or_insert_with(|| ScriptGroup::from_lock_script(&output.lock()));
725            lock_group_entry.input_indices.push(i);
726            if let Some(t) = &output.type_().to_opt() {
727                let type_group_entry = type_groups
728                    .entry(t.calc_script_hash())
729                    .or_insert_with(|| ScriptGroup::from_type_script(t));
730                type_group_entry.input_indices.push(i);
731            }
732        }
733        for (i, output) in rtx.transaction.outputs().into_iter().enumerate() {
734            if let Some(t) = &output.type_().to_opt() {
735                let type_group_entry = type_groups
736                    .entry(t.calc_script_hash())
737                    .or_insert_with(|| ScriptGroup::from_type_script(t));
738                type_group_entry.output_indices.push(i);
739            }
740        }
741
742        Self {
743            rtx,
744            info: Arc::new(TxInfo {
745                data_loader,
746                consensus,
747                tx_env,
748                binaries_by_data_hash,
749                binaries_by_type_hash,
750                lock_groups,
751                type_groups,
752                outputs,
753            }),
754        }
755    }
756
757    #[inline]
758    /// Extracts actual script binary either in dep cells.
759    pub fn extract_script(&self, script: &Script) -> Result<Bytes, ScriptError> {
760        self.info.extract_script(script)
761    }
762}
763
764impl<DL> TxInfo<DL>
765where
766    DL: CellDataProvider,
767{
768    #[inline]
769    /// Extracts actual script binary either in dep cells.
770    pub fn extract_script(&self, script: &Script) -> Result<Bytes, ScriptError> {
771        let (lazy, _) = self.extract_script_and_dep_index(script)?;
772        lazy.access(&self.data_loader)
773    }
774}
775
776impl<DL> TxData<DL> {
777    #[inline]
778    /// Calculates transaction hash
779    pub fn tx_hash(&self) -> Byte32 {
780        self.rtx.transaction.hash()
781    }
782
783    #[inline]
784    /// Extracts the index of the script binary in dep cells
785    pub fn extract_referenced_dep_index(&self, script: &Script) -> Result<usize, ScriptError> {
786        self.info.extract_referenced_dep_index(script)
787    }
788
789    #[inline]
790    /// Finds the script group from cell deps.
791    pub fn find_script_group(
792        &self,
793        script_group_type: ScriptGroupType,
794        script_hash: &Byte32,
795    ) -> Option<&ScriptGroup> {
796        self.info.find_script_group(script_group_type, script_hash)
797    }
798
799    #[inline]
800    /// Returns the version of the machine based on the script and the consensus rules.
801    pub fn select_version(&self, script: &Script) -> Result<ScriptVersion, ScriptError> {
802        self.info.select_version(script)
803    }
804
805    #[inline]
806    /// Returns all script groups.
807    pub fn groups(&self) -> impl Iterator<Item = (&'_ Byte32, &'_ ScriptGroup)> {
808        self.info.groups()
809    }
810
811    #[inline]
812    /// Returns all script groups with type.
813    pub fn groups_with_type(
814        &self,
815    ) -> impl Iterator<Item = (ScriptGroupType, &'_ Byte32, &'_ ScriptGroup)> {
816        self.info.groups_with_type()
817    }
818}
819
820impl<DL> TxInfo<DL> {
821    #[inline]
822    /// Extracts the index of the script binary in dep cells
823    pub fn extract_referenced_dep_index(&self, script: &Script) -> Result<usize, ScriptError> {
824        let (_, dep_index) = self.extract_script_and_dep_index(script)?;
825        Ok(*dep_index)
826    }
827
828    fn extract_script_and_dep_index(
829        &self,
830        script: &Script,
831    ) -> Result<(&LazyData, &usize), ScriptError> {
832        let script_hash_type = ScriptHashType::try_from(script.hash_type())
833            .map_err(|err| ScriptError::InvalidScriptHashType(err.to_string()))?;
834        match script_hash_type {
835            ScriptHashType::Data | ScriptHashType::Data1 | ScriptHashType::Data2 => {
836                if let Some((dep_index, lazy)) = self.binaries_by_data_hash.get(&script.code_hash())
837                {
838                    Ok((lazy, dep_index))
839                } else {
840                    Err(ScriptError::ScriptNotFound(script.code_hash()))
841                }
842            }
843            ScriptHashType::Type => {
844                if let Some(ref bin) = self.binaries_by_type_hash.get(&script.code_hash()) {
845                    match bin {
846                        Binaries::Unique(_, dep_index, lazy) => Ok((lazy, dep_index)),
847                        Binaries::Duplicate(_, dep_index, lazy) => Ok((lazy, dep_index)),
848                        Binaries::Multiple => Err(ScriptError::MultipleMatches),
849                    }
850                } else {
851                    Err(ScriptError::ScriptNotFound(script.code_hash()))
852                }
853            }
854        }
855    }
856
857    /// Finds the script group from cell deps.
858    pub fn find_script_group(
859        &self,
860        script_group_type: ScriptGroupType,
861        script_hash: &Byte32,
862    ) -> Option<&ScriptGroup> {
863        match script_group_type {
864            ScriptGroupType::Lock => self.lock_groups.get(script_hash),
865            ScriptGroupType::Type => self.type_groups.get(script_hash),
866        }
867    }
868
869    fn is_vm_version_1_and_syscalls_2_enabled(&self) -> bool {
870        // If the proposal window is allowed to prejudge on the vm version,
871        // it will cause proposal tx to start a new vm in the blocks before hardfork,
872        // destroying the assumption that the transaction execution only uses the old vm
873        // before hardfork, leading to unexpected network splits.
874        let epoch_number = self.tx_env.epoch_number_without_proposal_window();
875        let hardfork_switch = self.consensus.hardfork_switch();
876        hardfork_switch
877            .ckb2021
878            .is_vm_version_1_and_syscalls_2_enabled(epoch_number)
879    }
880
881    fn is_vm_version_2_and_syscalls_3_enabled(&self) -> bool {
882        // If the proposal window is allowed to prejudge on the vm version,
883        // it will cause proposal tx to start a new vm in the blocks before hardfork,
884        // destroying the assumption that the transaction execution only uses the old vm
885        // before hardfork, leading to unexpected network splits.
886        let epoch_number = self.tx_env.epoch_number_without_proposal_window();
887        let hardfork_switch = self.consensus.hardfork_switch();
888        hardfork_switch
889            .ckb2023
890            .is_vm_version_2_and_syscalls_3_enabled(epoch_number)
891    }
892
893    /// Returns the version of the machine based on the script and the consensus rules.
894    pub fn select_version(&self, script: &Script) -> Result<ScriptVersion, ScriptError> {
895        let is_vm_version_2_and_syscalls_3_enabled = self.is_vm_version_2_and_syscalls_3_enabled();
896        let is_vm_version_1_and_syscalls_2_enabled = self.is_vm_version_1_and_syscalls_2_enabled();
897        let script_hash_type = ScriptHashType::try_from(script.hash_type())
898            .map_err(|err| ScriptError::InvalidScriptHashType(err.to_string()))?;
899        match script_hash_type {
900            ScriptHashType::Data => Ok(ScriptVersion::V0),
901            ScriptHashType::Data1 => {
902                if is_vm_version_1_and_syscalls_2_enabled {
903                    Ok(ScriptVersion::V1)
904                } else {
905                    Err(ScriptError::InvalidVmVersion(1))
906                }
907            }
908            ScriptHashType::Data2 => {
909                if is_vm_version_2_and_syscalls_3_enabled {
910                    Ok(ScriptVersion::V2)
911                } else {
912                    Err(ScriptError::InvalidVmVersion(2))
913                }
914            }
915            ScriptHashType::Type => {
916                if is_vm_version_2_and_syscalls_3_enabled {
917                    Ok(ScriptVersion::V2)
918                } else if is_vm_version_1_and_syscalls_2_enabled {
919                    Ok(ScriptVersion::V1)
920                } else {
921                    Ok(ScriptVersion::V0)
922                }
923            }
924        }
925    }
926
927    /// Returns all script groups.
928    pub fn groups(&self) -> impl Iterator<Item = (&'_ Byte32, &'_ ScriptGroup)> {
929        self.lock_groups.iter().chain(self.type_groups.iter())
930    }
931
932    /// Returns all script groups with type.
933    pub fn groups_with_type(
934        &self,
935    ) -> impl Iterator<Item = (ScriptGroupType, &'_ Byte32, &'_ ScriptGroup)> {
936        self.lock_groups
937            .iter()
938            .map(|(hash, group)| (ScriptGroupType::Lock, hash, group))
939            .chain(
940                self.type_groups
941                    .iter()
942                    .map(|(hash, group)| (ScriptGroupType::Type, hash, group)),
943            )
944    }
945}
946
947/// Immutable context data at script group level
948#[derive(Clone, Debug)]
949pub struct SgData<DL> {
950    /// ResolvedTransaction.
951    pub rtx: Arc<ResolvedTransaction>,
952
953    /// Passed & derived information at transaction level.
954    pub tx_info: Arc<TxInfo<DL>>,
955
956    /// Passed & derived information at script group level.
957    pub sg_info: Arc<SgInfo>,
958}
959
960/// Script group level derived information.
961#[derive(Clone, Debug)]
962pub struct SgInfo {
963    /// Currently executed script version
964    pub script_version: ScriptVersion,
965    /// Currently executed script group
966    pub script_group: ScriptGroup,
967    /// Currently executed script hash
968    pub script_hash: Byte32,
969    /// DataPieceId for the root program
970    pub program_data_piece_id: DataPieceId,
971}
972
973impl<DL> SgData<DL> {
974    /// Creates a new SgData structure from TxData, and script group information
975    pub fn new(tx_data: &TxData<DL>, script_group: &ScriptGroup) -> Result<Self, ScriptError> {
976        let script_hash = script_group.script.calc_script_hash();
977        let script_version = tx_data.select_version(&script_group.script)?;
978        let dep_index = tx_data
979            .extract_referenced_dep_index(&script_group.script)?
980            .try_into()
981            .map_err(|_| ScriptError::Other("u32 overflow".to_string()))?;
982        Ok(Self {
983            rtx: Arc::clone(&tx_data.rtx),
984            tx_info: Arc::clone(&tx_data.info),
985            sg_info: Arc::new(SgInfo {
986                script_version,
987                script_hash,
988                script_group: script_group.clone(),
989                program_data_piece_id: DataPieceId::CellDep(dep_index),
990            }),
991        })
992    }
993
994    /// Shortcut to data loader
995    pub fn data_loader(&self) -> &DL {
996        &self.tx_info.data_loader
997    }
998
999    /// Shortcut to group input indices
1000    pub fn group_inputs(&self) -> &[usize] {
1001        &self.sg_info.script_group.input_indices
1002    }
1003
1004    /// Shortcut to group output indices
1005    pub fn group_outputs(&self) -> &[usize] {
1006        &self.sg_info.script_group.output_indices
1007    }
1008
1009    /// Shortcut to all outputs
1010    pub fn outputs(&self) -> &[CellMeta] {
1011        &self.tx_info.outputs
1012    }
1013}
1014
1015impl<DL> DataSource<DataPieceId> for SgData<DL>
1016where
1017    DL: CellDataProvider,
1018{
1019    fn load_data(&self, id: &DataPieceId, offset: u64, length: u64) -> Option<(Bytes, u64)> {
1020        match id {
1021            DataPieceId::Input(i) => self
1022                .rtx
1023                .resolved_inputs
1024                .get(*i as usize)
1025                .and_then(|cell| self.data_loader().load_cell_data(cell)),
1026            DataPieceId::Output(i) => self
1027                .rtx
1028                .transaction
1029                .outputs_data()
1030                .get(*i as usize)
1031                .map(|data| data.raw_data()),
1032            DataPieceId::CellDep(i) => self
1033                .rtx
1034                .resolved_cell_deps
1035                .get(*i as usize)
1036                .and_then(|cell| self.data_loader().load_cell_data(cell)),
1037            DataPieceId::GroupInput(i) => self
1038                .sg_info
1039                .script_group
1040                .input_indices
1041                .get(*i as usize)
1042                .and_then(|gi| self.rtx.resolved_inputs.get(*gi))
1043                .and_then(|cell| self.data_loader().load_cell_data(cell)),
1044            DataPieceId::GroupOutput(i) => self
1045                .sg_info
1046                .script_group
1047                .output_indices
1048                .get(*i as usize)
1049                .and_then(|gi| self.rtx.transaction.outputs_data().get(*gi))
1050                .map(|data| data.raw_data()),
1051            DataPieceId::Witness(i) => self
1052                .rtx
1053                .transaction
1054                .witnesses()
1055                .get(*i as usize)
1056                .map(|data| data.raw_data()),
1057            DataPieceId::WitnessGroupInput(i) => self
1058                .sg_info
1059                .script_group
1060                .input_indices
1061                .get(*i as usize)
1062                .and_then(|gi| self.rtx.transaction.witnesses().get(*gi))
1063                .map(|data| data.raw_data()),
1064            DataPieceId::WitnessGroupOutput(i) => self
1065                .sg_info
1066                .script_group
1067                .output_indices
1068                .get(*i as usize)
1069                .and_then(|gi| self.rtx.transaction.witnesses().get(*gi))
1070                .map(|data| data.raw_data()),
1071        }
1072        .map(|data| {
1073            let offset = std::cmp::min(offset as usize, data.len());
1074            let full_length = data.len() - offset;
1075            let real_length = if length > 0 {
1076                std::cmp::min(full_length, length as usize)
1077            } else {
1078                full_length
1079            };
1080            (data.slice(offset..offset + real_length), full_length as u64)
1081        })
1082    }
1083}
1084
1085/// When the vm is initialized, arguments are loaded onto the stack.
1086/// This enum specifies how to locate these arguments.
1087#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1088pub enum VmArgs {
1089    /// Represents reading arguments from other vm.
1090    Reader {
1091        /// An identifier for the virtual machine/process.
1092        vm_id: u64,
1093        /// The number of arguments provided.
1094        argc: u64,
1095        /// The pointer of the actual arguments.
1096        argv: u64,
1097    },
1098    /// Represents reading arguments from a vector.
1099    Vector(Vec<Bytes>),
1100}
1101
1102/// Mutable data at virtual machine level
1103#[derive(Clone)]
1104pub struct VmContext<DL>
1105where
1106    DL: CellDataProvider,
1107{
1108    /// Cycles executed before current VM starts
1109    pub base_cycles: Arc<AtomicU64>,
1110    /// A mutable reference to scheduler's message box
1111    pub message_box: Arc<Mutex<Vec<Message>>>,
1112    /// A snapshot for COW usage
1113    pub snapshot2_context: Arc<Mutex<Snapshot2Context<DataPieceId, SgData<DL>>>>,
1114}
1115
1116impl<DL> VmContext<DL>
1117where
1118    DL: CellDataProvider + Clone,
1119{
1120    /// Creates a new VM context. It is by design that parameters to this function
1121    /// are references. It is a reminder that the inputs are designed to be shared
1122    /// among different entities.
1123    pub fn new(sg_data: &SgData<DL>, message_box: &Arc<Mutex<Vec<Message>>>) -> Self {
1124        Self {
1125            base_cycles: Arc::new(AtomicU64::new(0)),
1126            message_box: Arc::clone(message_box),
1127            snapshot2_context: Arc::new(Mutex::new(Snapshot2Context::new(sg_data.clone()))),
1128        }
1129    }
1130
1131    /// Sets current base cycles
1132    pub fn set_base_cycles(&mut self, base_cycles: u64) {
1133        self.base_cycles.store(base_cycles, Ordering::Release);
1134    }
1135}
1136
1137/// The scheduler's running mode.
1138#[derive(Clone)]
1139pub enum RunMode {
1140    /// Continues running until cycles are exhausted.
1141    LimitCycles(Cycle),
1142    /// Continues running until a Pause signal is received or cycles are exhausted.
1143    Pause(Pause, Cycle),
1144}
1145
1146/// Terminated result
1147#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1148pub struct TerminatedResult {
1149    /// Root VM exit code
1150    pub exit_code: i8,
1151    /// Total consumed cycles by all VMs in current scheduler,
1152    /// up to this execution point.
1153    pub consumed_cycles: Cycle,
1154}
1155
1156/// Single iteration result
1157#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
1158pub struct IterationResult {
1159    /// VM ID that gets executed
1160    pub executed_vm: VmId,
1161    /// Terminated status
1162    pub terminated_status: Option<TerminatedResult>,
1163}