ckb_script/
types.rs

1//! Common type definitions for ckb-script package.
2
3use crate::{error::ScriptError, verify_env::TxVerifyEnv};
4use ckb_chain_spec::consensus::Consensus;
5use ckb_types::{
6    core::{
7        Cycle, ScriptHashType,
8        cell::{CellMeta, ResolvedTransaction},
9    },
10    packed::{Byte32, CellOutput, OutPoint, Script},
11    prelude::*,
12};
13use ckb_vm::{
14    ISA_B, ISA_IMC, ISA_MOP, Syscalls,
15    machine::{VERSION0, VERSION1, VERSION2},
16};
17use serde::{Deserialize, Serialize};
18use std::collections::{BTreeMap, HashMap};
19use std::fmt;
20use std::sync::{
21    Arc, Mutex, RwLock,
22    atomic::{AtomicU64, Ordering},
23};
24
25use ckb_traits::CellDataProvider;
26use ckb_vm::snapshot2::Snapshot2Context;
27
28use ckb_vm::{
29    DefaultMachineRunner, RISCV_GENERAL_REGISTER_NUMBER, SupportMachine,
30    bytes::Bytes,
31    machine::Pause,
32    snapshot2::{DataSource, Snapshot2},
33};
34use std::mem::size_of;
35
36/// The type of CKB-VM ISA.
37pub type VmIsa = u8;
38/// /// The type of CKB-VM version.
39pub type VmVersion = u32;
40
41/// The default machine type when asm feature is enabled. Note that ckb-script now functions
42/// solely based on ckb_vm::DefaultMachineRunner trait. The type provided here is only for
43/// default implementations.
44#[cfg(has_asm)]
45pub type Machine = ckb_vm::machine::asm::AsmMachine;
46/// The default machine type when neither asm feature nor flatmemory feature is not enabled
47#[cfg(all(not(has_asm), not(feature = "flatmemory")))]
48pub type Machine = ckb_vm::TraceMachine<
49    ckb_vm::DefaultCoreMachine<u64, ckb_vm::WXorXMemory<ckb_vm::SparseMemory<u64>>>,
50>;
51/// The default machine type when asm feature is not enabled, but flatmemory is enabled
52#[cfg(all(not(has_asm), feature = "flatmemory"))]
53pub type Machine = ckb_vm::TraceMachine<
54    ckb_vm::DefaultCoreMachine<u64, ckb_vm::XorXMemory<ckb_vm::FlatMemory<u64>>>,
55>;
56
57/// Debug printer function type
58pub type DebugPrinter = Arc<dyn Fn(&Byte32, &str) + Send + Sync>;
59/// Syscall generator function type
60pub type SyscallGenerator<DL, V, M> =
61    fn(&VmId, &SgData<DL>, &VmContext<DL>, &V) -> Vec<Box<(dyn Syscalls<M>)>>;
62
63/// The version of CKB Script Verifier.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
65pub enum ScriptVersion {
66    /// CKB VM 0 with Syscall version 1.
67    V0 = 0,
68    /// CKB VM 1 with Syscall version 1 and version 2.
69    V1 = 1,
70    /// CKB VM 2 with Syscall version 1, version 2 and version 3.
71    V2 = 2,
72}
73
74impl ScriptVersion {
75    /// Returns the latest version.
76    pub const fn latest() -> Self {
77        Self::V2
78    }
79
80    /// Returns the ISA set of CKB VM in current script version.
81    pub fn vm_isa(self) -> VmIsa {
82        match self {
83            Self::V0 => ISA_IMC,
84            Self::V1 => ISA_IMC | ISA_B | ISA_MOP,
85            Self::V2 => ISA_IMC | ISA_B | ISA_MOP,
86        }
87    }
88
89    /// Returns the version of CKB VM in current script version.
90    pub fn vm_version(self) -> VmVersion {
91        match self {
92            Self::V0 => VERSION0,
93            Self::V1 => VERSION1,
94            Self::V2 => VERSION2,
95        }
96    }
97
98    /// Returns the specific data script hash type.
99    ///
100    /// Returns:
101    /// - `ScriptHashType::Data` for version 0;
102    /// - `ScriptHashType::Data1` for version 1;
103    pub fn data_hash_type(self) -> ScriptHashType {
104        match self {
105            Self::V0 => ScriptHashType::Data,
106            Self::V1 => ScriptHashType::Data1,
107            Self::V2 => ScriptHashType::Data2,
108        }
109    }
110
111    /// Creates a CKB VM core machine without cycles limit.
112    ///
113    /// In fact, there is still a limit of `max_cycles` which is set to `2^64-1`.
114    pub fn init_core_machine_without_limit(self) -> <Machine as DefaultMachineRunner>::Inner {
115        self.init_core_machine(u64::MAX)
116    }
117
118    /// Creates a CKB VM core machine.
119    pub fn init_core_machine(self, max_cycles: Cycle) -> <Machine as DefaultMachineRunner>::Inner {
120        let isa = self.vm_isa();
121        let version = self.vm_version();
122        <<Machine as DefaultMachineRunner>::Inner as SupportMachine>::new(isa, version, max_cycles)
123    }
124}
125
126/// A script group is defined as scripts that share the same hash.
127///
128/// A script group will only be executed once per transaction, the
129/// script itself should check against all inputs/outputs in its group
130/// if needed.
131#[derive(Clone, Debug)]
132pub struct ScriptGroup {
133    /// The script.
134    ///
135    /// A script group is a group of input and output cells that share the same script.
136    pub script: Script,
137    /// The script group type.
138    pub group_type: ScriptGroupType,
139    /// Indices of input cells.
140    pub input_indices: Vec<usize>,
141    /// Indices of output cells.
142    pub output_indices: Vec<usize>,
143}
144
145/// The methods included here are defected in a way: all construction
146/// methods here create ScriptGroup without any `input_indices` or
147/// `output_indices` filled. One has to manually fill them later(or forgot
148/// about this).
149/// As a result, we are marking them as crate-only methods for now. This
150/// forces users to one of the following 2 solutions:
151/// * Call `groups()` on `TxData` so they can fetch `ScriptGroup` data with
152///   all correct data filled.
153/// * Manually construct the struct where they have to think what shall be
154///   used for `input_indices` and `output_indices`.
155impl ScriptGroup {
156    /// Creates a new script group struct.
157    pub(crate) fn new(script: &Script, group_type: ScriptGroupType) -> Self {
158        Self {
159            group_type,
160            script: script.to_owned(),
161            input_indices: vec![],
162            output_indices: vec![],
163        }
164    }
165
166    /// Creates a lock script group.
167    pub(crate) fn from_lock_script(script: &Script) -> Self {
168        Self::new(script, ScriptGroupType::Lock)
169    }
170
171    /// Creates a type script group.
172    pub(crate) fn from_type_script(script: &Script) -> Self {
173        Self::new(script, ScriptGroupType::Type)
174    }
175}
176
177/// The script group type.
178///
179/// A cell can have a lock script and an optional type script. Even they reference the same script,
180/// lock script and type script will not be grouped together.
181#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)]
182#[serde(rename_all = "snake_case")]
183pub enum ScriptGroupType {
184    /// Lock script group.
185    Lock,
186    /// Type script group.
187    Type,
188}
189
190impl fmt::Display for ScriptGroupType {
191    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
192        match self {
193            ScriptGroupType::Lock => write!(f, "Lock"),
194            ScriptGroupType::Type => write!(f, "Type"),
195        }
196    }
197}
198
199/// Struct specifies which script has verified so far.
200/// State is lifetime free, but capture snapshot need heavy memory copy
201#[derive(Clone)]
202pub struct TransactionState {
203    /// current suspended script index
204    pub current: usize,
205    /// vm scheduler suspend state
206    pub state: Option<FullSuspendedState>,
207    /// current consumed cycle
208    pub current_cycles: Cycle,
209    /// limit cycles
210    pub limit_cycles: Cycle,
211}
212
213impl TransactionState {
214    /// Creates a new TransactionState struct
215    pub fn new(
216        state: Option<FullSuspendedState>,
217        current: usize,
218        current_cycles: Cycle,
219        limit_cycles: Cycle,
220    ) -> Self {
221        TransactionState {
222            current,
223            state,
224            current_cycles,
225            limit_cycles,
226        }
227    }
228
229    /// Return next limit cycles according to max_cycles and step_cycles
230    pub fn next_limit_cycles(&self, step_cycles: Cycle, max_cycles: Cycle) -> (Cycle, bool) {
231        let remain = max_cycles - self.current_cycles;
232        let next_limit = self.limit_cycles + step_cycles;
233
234        if next_limit < remain {
235            (next_limit, false)
236        } else {
237            (remain, true)
238        }
239    }
240}
241
242/// Enum represent resumable verify result
243#[allow(clippy::large_enum_variant)]
244#[derive(Debug)]
245pub enum VerifyResult {
246    /// Completed total cycles
247    Completed(Cycle),
248    /// Suspended state
249    Suspended(TransactionState),
250}
251
252impl std::fmt::Debug for TransactionState {
253    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> std::fmt::Result {
254        f.debug_struct("TransactionState")
255            .field("current", &self.current)
256            .field("current_cycles", &self.current_cycles)
257            .field("limit_cycles", &self.limit_cycles)
258            .finish()
259    }
260}
261
262/// ChunkCommand is used to control the verification process to suspend or resume
263#[derive(Eq, PartialEq, Clone, Debug)]
264pub enum ChunkCommand {
265    /// Suspend the verification process
266    Suspend,
267    /// Resume the verification process
268    Resume,
269    /// Stop the verification process
270    Stop,
271}
272
273/// VM id type
274pub type VmId = u64;
275/// The first VM booted always have 0 as the ID
276pub const FIRST_VM_ID: VmId = 0;
277
278/// File descriptor
279#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
280pub struct Fd(pub u64);
281
282/// The first FD to be used
283pub const FIRST_FD_SLOT: u64 = 2;
284
285impl Fd {
286    /// Creates a new pipe with 2 fds, also return the next available fd slot
287    pub fn create(slot: u64) -> (Fd, Fd, u64) {
288        (Fd(slot), Fd(slot + 1), slot + 2)
289    }
290
291    /// Finds the other file descriptor of a pipe
292    pub fn other_fd(&self) -> Fd {
293        Fd(self.0 ^ 0x1)
294    }
295
296    /// Tests if current fd is used for reading from a pipe
297    pub fn is_read(&self) -> bool {
298        self.0 % 2 == 0
299    }
300
301    /// Tests if current fd is used for writing to a pipe
302    pub fn is_write(&self) -> bool {
303        self.0 % 2 == 1
304    }
305}
306
307/// VM is in waiting-to-read state.
308#[derive(Clone, Debug, PartialEq, Eq, Hash)]
309pub struct ReadState {
310    /// FD to read from
311    pub fd: Fd,
312    /// Length to read
313    pub length: u64,
314    /// VM address to read data into
315    pub buffer_addr: u64,
316    /// Length address to keep final read length
317    pub length_addr: u64,
318}
319
320/// VM is in waiting-to-write state.
321#[derive(Clone, Debug, PartialEq, Eq, Hash)]
322pub struct WriteState {
323    /// FD to write to
324    pub fd: Fd,
325    /// Bytes that have already been written
326    pub consumed: u64,
327    /// Length to write
328    pub length: u64,
329    /// VM address to write data from
330    pub buffer_addr: u64,
331    /// Length of address to keep final written length
332    pub length_addr: u64,
333}
334
335/// VM State.
336#[derive(Clone, Debug, PartialEq, Eq, Hash)]
337pub enum VmState {
338    /// Runnable.
339    Runnable,
340    /// Terminated.
341    Terminated,
342    /// Wait.
343    Wait {
344        /// Target vm id.
345        target_vm_id: VmId,
346        /// Exit code addr.
347        exit_code_addr: u64,
348    },
349    /// WaitForWrite.
350    WaitForWrite(WriteState),
351    /// WaitForRead.
352    WaitForRead(ReadState),
353}
354
355/// Used to specify the location of script data.
356#[derive(Clone, Debug)]
357pub struct DataLocation {
358    /// A pointer to the data.
359    pub data_piece_id: DataPieceId,
360    /// Data offset.
361    pub offset: u64,
362    /// Data length.
363    pub length: u64,
364}
365
366/// Arguments for exec syscall
367#[derive(Clone, Debug)]
368pub struct ExecV2Args {
369    /// Data location for the program to invoke
370    pub location: DataLocation,
371    /// Argc
372    pub argc: u64,
373    /// Argv
374    pub argv: u64,
375}
376
377/// Arguments for spawn syscall
378#[derive(Clone, Debug)]
379pub struct SpawnArgs {
380    /// Data location for the program to spawn
381    pub location: DataLocation,
382    /// Argc
383    pub argc: u64,
384    /// Argv
385    pub argv: u64,
386    /// File descriptors to pass to spawned child process
387    pub fds: Vec<Fd>,
388    /// VM address to keep pid for spawned child process
389    pub process_id_addr: u64,
390}
391
392/// Arguments for wait syscall
393#[derive(Clone, Debug)]
394pub struct WaitArgs {
395    /// VM ID to wait for termination
396    pub target_id: VmId,
397    /// VM address to keep exit code for the waited process
398    pub exit_code_addr: u64,
399}
400
401/// Arguments for pipe syscall
402#[derive(Clone, Debug)]
403pub struct PipeArgs {
404    /// VM address to keep the first created file descriptor
405    pub fd1_addr: u64,
406    /// VM address to keep the second created file descriptor
407    pub fd2_addr: u64,
408}
409
410/// Arguments shared by read, write and inherited fd syscalls
411#[derive(Clone, Debug)]
412pub struct FdArgs {
413    /// For each and write syscalls, this contains the file descriptor to use
414    pub fd: Fd,
415    /// Length to read or length to write for read/write syscalls. Inherited
416    /// fd syscall will ignore this field.
417    pub length: u64,
418    /// VM address to keep returned data
419    pub buffer_addr: u64,
420    /// VM address for a input / output length buffer.
421    /// For read / write syscalls, this contains the actual data length.
422    /// For inherited fd syscall, this contains the number of file descriptors.
423    pub length_addr: u64,
424}
425
426/// Inter-process message, this is now used for implementing syscalls, but might
427/// be expanded for more usages later.
428#[derive(Clone, Debug)]
429pub enum Message {
430    /// Exec syscall
431    ExecV2(VmId, ExecV2Args),
432    /// Spawn syscall
433    Spawn(VmId, SpawnArgs),
434    /// Wait syscall
435    Wait(VmId, WaitArgs),
436    /// Pipe syscall
437    Pipe(VmId, PipeArgs),
438    /// Read syscall
439    FdRead(VmId, FdArgs),
440    /// Write syscall
441    FdWrite(VmId, FdArgs),
442    /// Inherited FD syscall
443    InheritedFileDescriptor(VmId, FdArgs),
444    /// Close syscall
445    Close(VmId, Fd),
446}
447
448/// A pointer to the data that is part of the transaction.
449#[derive(Clone, Debug, PartialEq, Eq, Hash)]
450pub enum DataPieceId {
451    /// The nth input cell data.
452    Input(u32),
453    /// The nth output data.
454    Output(u32),
455    /// The nth cell dep cell data.
456    CellDep(u32),
457    /// The nth group input cell data.
458    GroupInput(u32),
459    /// The nth group output data.
460    GroupOutput(u32),
461    /// The nth witness.
462    Witness(u32),
463    /// The nth witness group input.
464    WitnessGroupInput(u32),
465    /// The nth witness group output.
466    WitnessGroupOutput(u32),
467}
468
469impl TryFrom<(u64, u64, u64)> for DataPieceId {
470    type Error = String;
471
472    fn try_from(value: (u64, u64, u64)) -> Result<Self, Self::Error> {
473        let (source, index, place) = value;
474        let index: u32 =
475            u32::try_from(index).map_err(|e| format!("Error casting index to u32: {}", e))?;
476        match (source, place) {
477            (1, 0) => Ok(DataPieceId::Input(index)),
478            (2, 0) => Ok(DataPieceId::Output(index)),
479            (3, 0) => Ok(DataPieceId::CellDep(index)),
480            (0x0100000000000001, 0) => Ok(DataPieceId::GroupInput(index)),
481            (0x0100000000000002, 0) => Ok(DataPieceId::GroupOutput(index)),
482            (1, 1) => Ok(DataPieceId::Witness(index)),
483            (2, 1) => Ok(DataPieceId::Witness(index)),
484            (0x0100000000000001, 1) => Ok(DataPieceId::WitnessGroupInput(index)),
485            (0x0100000000000002, 1) => Ok(DataPieceId::WitnessGroupOutput(index)),
486            _ => Err(format!("Invalid source value: {:#x}", source)),
487        }
488    }
489}
490
491/// Full state representing all VM instances from verifying a CKB script.
492/// It should be serializable to binary formats, while also be able to
493/// fully recover the running environment with the full transaction environment.
494#[derive(Clone, Debug)]
495pub struct FullSuspendedState {
496    /// Total executed cycles
497    pub total_cycles: Cycle,
498    /// Next available VM ID
499    pub next_vm_id: VmId,
500    /// Next available file descriptor
501    pub next_fd_slot: u64,
502    /// Suspended VMs
503    pub vms: Vec<(VmId, VmState, Snapshot2<DataPieceId>)>,
504    /// Opened file descriptors with owners
505    pub fds: Vec<(Fd, VmId)>,
506    /// Inherited file descriptors for each spawned process
507    pub inherited_fd: Vec<(VmId, Vec<Fd>)>,
508    /// Terminated VMs with exit codes
509    pub terminated_vms: Vec<(VmId, i8)>,
510    /// Currently instantiated VMs. Upon resumption, those VMs will
511    /// be instantiated.
512    pub instantiated_ids: Vec<VmId>,
513}
514
515impl FullSuspendedState {
516    /// Calculates the size of current suspended state, should be used
517    /// to derive cycles charged for suspending / resuming.
518    pub fn size(&self) -> u64 {
519        (size_of::<Cycle>()
520            + size_of::<VmId>()
521            + size_of::<u64>()
522            + self.vms.iter().fold(0, |mut acc, (_, _, snapshot)| {
523                acc += size_of::<VmId>() + size_of::<VmState>();
524                acc += snapshot.pages_from_source.len()
525                    * (size_of::<u64>()
526                        + size_of::<u8>()
527                        + size_of::<DataPieceId>()
528                        + size_of::<u64>()
529                        + size_of::<u64>());
530                for dirty_page in &snapshot.dirty_pages {
531                    acc += size_of::<u64>() + size_of::<u8>() + dirty_page.2.len();
532                }
533                acc += size_of::<u32>()
534                    + RISCV_GENERAL_REGISTER_NUMBER * size_of::<u64>()
535                    + size_of::<u64>()
536                    + size_of::<u64>()
537                    + size_of::<u64>();
538                acc
539            })
540            + (self.fds.len() * (size_of::<Fd>() + size_of::<VmId>()))) as u64
541            + (self.inherited_fd.len() * (size_of::<Fd>())) as u64
542            + (self.terminated_vms.len() * (size_of::<VmId>() + size_of::<i8>())) as u64
543            + (self.instantiated_ids.len() * size_of::<VmId>()) as u64
544    }
545}
546
547/// A cell that is either loaded, or not yet loaded.
548#[derive(Debug, PartialEq, Eq, Clone)]
549pub enum DataGuard {
550    /// Un-loaded out point
551    NotLoaded(OutPoint),
552    /// Loaded data
553    Loaded(Bytes),
554}
555
556/// LazyData wrapper make sure not-loaded data will be loaded only after one access
557#[derive(Debug, Clone)]
558pub struct LazyData(Arc<RwLock<DataGuard>>);
559
560impl LazyData {
561    fn from_cell_meta(cell_meta: &CellMeta) -> LazyData {
562        match &cell_meta.mem_cell_data {
563            Some(data) => LazyData(Arc::new(RwLock::new(DataGuard::Loaded(data.to_owned())))),
564            None => LazyData(Arc::new(RwLock::new(DataGuard::NotLoaded(
565                cell_meta.out_point.clone(),
566            )))),
567        }
568    }
569
570    fn access<DL: CellDataProvider>(&self, data_loader: &DL) -> Result<Bytes, ScriptError> {
571        let guard = self
572            .0
573            .read()
574            .map_err(|_| ScriptError::Other("RwLock poisoned".into()))?
575            .to_owned();
576        match guard {
577            DataGuard::NotLoaded(out_point) => {
578                let data = data_loader
579                    .get_cell_data(&out_point)
580                    .ok_or(ScriptError::Other("cell data not found".into()))?;
581                let mut write_guard = self
582                    .0
583                    .write()
584                    .map_err(|_| ScriptError::Other("RwLock poisoned".into()))?;
585                *write_guard = DataGuard::Loaded(data.clone());
586                Ok(data)
587            }
588            DataGuard::Loaded(bytes) => Ok(bytes),
589        }
590    }
591}
592
593/// A tri-state enum for representing binary lookup results.
594#[derive(Debug, Clone)]
595pub enum Binaries {
596    /// A unique cell is found for the requested binary.
597    Unique(Byte32, usize, LazyData),
598    /// Multiple cells are found for the requested binary, but all
599    /// the cells contains the same content(hence binary lookup still
600    /// succeeds).
601    Duplicate(Byte32, usize, LazyData),
602    /// Multiple cells are found for the requested binary, and they
603    /// differ so there is no way to tell which binary shall be used.
604    Multiple,
605}
606
607impl Binaries {
608    fn new(data_hash: Byte32, dep_index: usize, data: LazyData) -> Self {
609        Self::Unique(data_hash, dep_index, data)
610    }
611
612    fn merge(&mut self, data_hash: &Byte32) {
613        match self {
614            Self::Unique(hash, dep_index, data) | Self::Duplicate(hash, dep_index, data) => {
615                if hash != data_hash {
616                    *self = Self::Multiple;
617                } else {
618                    *self = Self::Duplicate(hash.to_owned(), *dep_index, data.to_owned());
619                }
620            }
621            Self::Multiple => {}
622        }
623    }
624}
625
626/// Immutable context data at transaction level
627#[derive(Clone, Debug)]
628pub struct TxData<DL> {
629    /// ResolvedTransaction.
630    pub rtx: Arc<ResolvedTransaction>,
631
632    /// Passed & derived information.
633    pub info: Arc<TxInfo<DL>>,
634}
635
636/// Information that is either passed as the context of the transaction,
637/// or can be derived from the transaction.
638#[derive(Clone, Debug)]
639pub struct TxInfo<DL> {
640    /// Data loader.
641    pub data_loader: DL,
642    /// Chain consensus parameters
643    pub consensus: Arc<Consensus>,
644    /// Transaction verification environment
645    pub tx_env: Arc<TxVerifyEnv>,
646
647    /// Potential binaries in current transaction indexed by data hash
648    pub binaries_by_data_hash: HashMap<Byte32, (usize, LazyData)>,
649    /// Potential binaries in current transaction indexed by type script hash
650    pub binaries_by_type_hash: HashMap<Byte32, Binaries>,
651    /// Lock script groups, orders here are important
652    pub lock_groups: BTreeMap<Byte32, ScriptGroup>,
653    /// Type script groups, orders here are important
654    pub type_groups: BTreeMap<Byte32, ScriptGroup>,
655    /// Output cells in current transaction reorganized in CellMeta format
656    pub outputs: Vec<CellMeta>,
657}
658
659impl<DL> TxData<DL>
660where
661    DL: CellDataProvider,
662{
663    /// Creates a new TxData structure
664    pub fn new(
665        rtx: Arc<ResolvedTransaction>,
666        data_loader: DL,
667        consensus: Arc<Consensus>,
668        tx_env: Arc<TxVerifyEnv>,
669    ) -> Self {
670        let tx_hash = rtx.transaction.hash();
671        let resolved_cell_deps = &rtx.resolved_cell_deps;
672        let resolved_inputs = &rtx.resolved_inputs;
673        let outputs = rtx
674            .transaction
675            .outputs_with_data_iter()
676            .enumerate()
677            .map(|(index, (cell_output, data))| {
678                let out_point = OutPoint::new_builder()
679                    .tx_hash(tx_hash.clone())
680                    .index(index.pack())
681                    .build();
682                let data_hash = CellOutput::calc_data_hash(&data);
683                CellMeta {
684                    cell_output,
685                    out_point,
686                    transaction_info: None,
687                    data_bytes: data.len() as u64,
688                    mem_cell_data: Some(data),
689                    mem_cell_data_hash: Some(data_hash),
690                }
691            })
692            .collect();
693
694        let mut binaries_by_data_hash: HashMap<Byte32, (usize, LazyData)> = HashMap::default();
695        let mut binaries_by_type_hash: HashMap<Byte32, Binaries> = HashMap::default();
696        for (i, cell_meta) in resolved_cell_deps.iter().enumerate() {
697            let data_hash = data_loader
698                .load_cell_data_hash(cell_meta)
699                .expect("cell data hash");
700            let lazy = LazyData::from_cell_meta(cell_meta);
701            binaries_by_data_hash.insert(data_hash.to_owned(), (i, lazy.to_owned()));
702
703            if let Some(t) = &cell_meta.cell_output.type_().to_opt() {
704                binaries_by_type_hash
705                    .entry(t.calc_script_hash())
706                    .and_modify(|bin| bin.merge(&data_hash))
707                    .or_insert_with(|| Binaries::new(data_hash.to_owned(), i, lazy.to_owned()));
708            }
709        }
710
711        let mut lock_groups = BTreeMap::default();
712        let mut type_groups = BTreeMap::default();
713        for (i, cell_meta) in resolved_inputs.iter().enumerate() {
714            // here we are only pre-processing the data, verify method validates
715            // each input has correct script setup.
716            let output = &cell_meta.cell_output;
717            let lock_group_entry = lock_groups
718                .entry(output.calc_lock_hash())
719                .or_insert_with(|| ScriptGroup::from_lock_script(&output.lock()));
720            lock_group_entry.input_indices.push(i);
721            if let Some(t) = &output.type_().to_opt() {
722                let type_group_entry = type_groups
723                    .entry(t.calc_script_hash())
724                    .or_insert_with(|| ScriptGroup::from_type_script(t));
725                type_group_entry.input_indices.push(i);
726            }
727        }
728        for (i, output) in rtx.transaction.outputs().into_iter().enumerate() {
729            if let Some(t) = &output.type_().to_opt() {
730                let type_group_entry = type_groups
731                    .entry(t.calc_script_hash())
732                    .or_insert_with(|| ScriptGroup::from_type_script(t));
733                type_group_entry.output_indices.push(i);
734            }
735        }
736
737        Self {
738            rtx,
739            info: Arc::new(TxInfo {
740                data_loader,
741                consensus,
742                tx_env,
743                binaries_by_data_hash,
744                binaries_by_type_hash,
745                lock_groups,
746                type_groups,
747                outputs,
748            }),
749        }
750    }
751
752    #[inline]
753    /// Extracts actual script binary either in dep cells.
754    pub fn extract_script(&self, script: &Script) -> Result<Bytes, ScriptError> {
755        self.info.extract_script(script)
756    }
757}
758
759impl<DL> TxInfo<DL>
760where
761    DL: CellDataProvider,
762{
763    #[inline]
764    /// Extracts actual script binary either in dep cells.
765    pub fn extract_script(&self, script: &Script) -> Result<Bytes, ScriptError> {
766        let (lazy, _) = self.extract_script_and_dep_index(script)?;
767        lazy.access(&self.data_loader)
768    }
769}
770
771impl<DL> TxData<DL> {
772    #[inline]
773    /// Calculates transaction hash
774    pub fn tx_hash(&self) -> Byte32 {
775        self.rtx.transaction.hash()
776    }
777
778    #[inline]
779    /// Extracts the index of the script binary in dep cells
780    pub fn extract_referenced_dep_index(&self, script: &Script) -> Result<usize, ScriptError> {
781        self.info.extract_referenced_dep_index(script)
782    }
783
784    #[inline]
785    /// Finds the script group from cell deps.
786    pub fn find_script_group(
787        &self,
788        script_group_type: ScriptGroupType,
789        script_hash: &Byte32,
790    ) -> Option<&ScriptGroup> {
791        self.info.find_script_group(script_group_type, script_hash)
792    }
793
794    #[inline]
795    /// Returns the version of the machine based on the script and the consensus rules.
796    pub fn select_version(&self, script: &Script) -> Result<ScriptVersion, ScriptError> {
797        self.info.select_version(script)
798    }
799
800    #[inline]
801    /// Returns all script groups.
802    pub fn groups(&self) -> impl Iterator<Item = (&'_ Byte32, &'_ ScriptGroup)> {
803        self.info.groups()
804    }
805
806    #[inline]
807    /// Returns all script groups with type.
808    pub fn groups_with_type(
809        &self,
810    ) -> impl Iterator<Item = (ScriptGroupType, &'_ Byte32, &'_ ScriptGroup)> {
811        self.info.groups_with_type()
812    }
813}
814
815impl<DL> TxInfo<DL> {
816    #[inline]
817    /// Extracts the index of the script binary in dep cells
818    pub fn extract_referenced_dep_index(&self, script: &Script) -> Result<usize, ScriptError> {
819        let (_, dep_index) = self.extract_script_and_dep_index(script)?;
820        Ok(*dep_index)
821    }
822
823    fn extract_script_and_dep_index(
824        &self,
825        script: &Script,
826    ) -> Result<(&LazyData, &usize), ScriptError> {
827        let script_hash_type = ScriptHashType::try_from(script.hash_type())
828            .map_err(|err| ScriptError::InvalidScriptHashType(err.to_string()))?;
829        match script_hash_type {
830            ScriptHashType::Data | ScriptHashType::Data1 | ScriptHashType::Data2 => {
831                if let Some((dep_index, lazy)) = self.binaries_by_data_hash.get(&script.code_hash())
832                {
833                    Ok((lazy, dep_index))
834                } else {
835                    Err(ScriptError::ScriptNotFound(script.code_hash()))
836                }
837            }
838            ScriptHashType::Type => {
839                if let Some(ref bin) = self.binaries_by_type_hash.get(&script.code_hash()) {
840                    match bin {
841                        Binaries::Unique(_, dep_index, lazy) => Ok((lazy, dep_index)),
842                        Binaries::Duplicate(_, dep_index, lazy) => Ok((lazy, dep_index)),
843                        Binaries::Multiple => Err(ScriptError::MultipleMatches),
844                    }
845                } else {
846                    Err(ScriptError::ScriptNotFound(script.code_hash()))
847                }
848            }
849        }
850    }
851
852    /// Finds the script group from cell deps.
853    pub fn find_script_group(
854        &self,
855        script_group_type: ScriptGroupType,
856        script_hash: &Byte32,
857    ) -> Option<&ScriptGroup> {
858        match script_group_type {
859            ScriptGroupType::Lock => self.lock_groups.get(script_hash),
860            ScriptGroupType::Type => self.type_groups.get(script_hash),
861        }
862    }
863
864    fn is_vm_version_1_and_syscalls_2_enabled(&self) -> bool {
865        // If the proposal window is allowed to prejudge on the vm version,
866        // it will cause proposal tx to start a new vm in the blocks before hardfork,
867        // destroying the assumption that the transaction execution only uses the old vm
868        // before hardfork, leading to unexpected network splits.
869        let epoch_number = self.tx_env.epoch_number_without_proposal_window();
870        let hardfork_switch = self.consensus.hardfork_switch();
871        hardfork_switch
872            .ckb2021
873            .is_vm_version_1_and_syscalls_2_enabled(epoch_number)
874    }
875
876    fn is_vm_version_2_and_syscalls_3_enabled(&self) -> bool {
877        // If the proposal window is allowed to prejudge on the vm version,
878        // it will cause proposal tx to start a new vm in the blocks before hardfork,
879        // destroying the assumption that the transaction execution only uses the old vm
880        // before hardfork, leading to unexpected network splits.
881        let epoch_number = self.tx_env.epoch_number_without_proposal_window();
882        let hardfork_switch = self.consensus.hardfork_switch();
883        hardfork_switch
884            .ckb2023
885            .is_vm_version_2_and_syscalls_3_enabled(epoch_number)
886    }
887
888    /// Returns the version of the machine based on the script and the consensus rules.
889    pub fn select_version(&self, script: &Script) -> Result<ScriptVersion, ScriptError> {
890        let is_vm_version_2_and_syscalls_3_enabled = self.is_vm_version_2_and_syscalls_3_enabled();
891        let is_vm_version_1_and_syscalls_2_enabled = self.is_vm_version_1_and_syscalls_2_enabled();
892        let script_hash_type = ScriptHashType::try_from(script.hash_type())
893            .map_err(|err| ScriptError::InvalidScriptHashType(err.to_string()))?;
894        match script_hash_type {
895            ScriptHashType::Data => Ok(ScriptVersion::V0),
896            ScriptHashType::Data1 => {
897                if is_vm_version_1_and_syscalls_2_enabled {
898                    Ok(ScriptVersion::V1)
899                } else {
900                    Err(ScriptError::InvalidVmVersion(1))
901                }
902            }
903            ScriptHashType::Data2 => {
904                if is_vm_version_2_and_syscalls_3_enabled {
905                    Ok(ScriptVersion::V2)
906                } else {
907                    Err(ScriptError::InvalidVmVersion(2))
908                }
909            }
910            ScriptHashType::Type => {
911                if is_vm_version_2_and_syscalls_3_enabled {
912                    Ok(ScriptVersion::V2)
913                } else if is_vm_version_1_and_syscalls_2_enabled {
914                    Ok(ScriptVersion::V1)
915                } else {
916                    Ok(ScriptVersion::V0)
917                }
918            }
919        }
920    }
921
922    /// Returns all script groups.
923    pub fn groups(&self) -> impl Iterator<Item = (&'_ Byte32, &'_ ScriptGroup)> {
924        self.lock_groups.iter().chain(self.type_groups.iter())
925    }
926
927    /// Returns all script groups with type.
928    pub fn groups_with_type(
929        &self,
930    ) -> impl Iterator<Item = (ScriptGroupType, &'_ Byte32, &'_ ScriptGroup)> {
931        self.lock_groups
932            .iter()
933            .map(|(hash, group)| (ScriptGroupType::Lock, hash, group))
934            .chain(
935                self.type_groups
936                    .iter()
937                    .map(|(hash, group)| (ScriptGroupType::Type, hash, group)),
938            )
939    }
940}
941
942/// Immutable context data at script group level
943#[derive(Clone, Debug)]
944pub struct SgData<DL> {
945    /// ResolvedTransaction.
946    pub rtx: Arc<ResolvedTransaction>,
947
948    /// Passed & derived information at transaction level.
949    pub tx_info: Arc<TxInfo<DL>>,
950
951    /// Passed & derived information at script group level.
952    pub sg_info: Arc<SgInfo>,
953}
954
955/// Script group level derived information.
956#[derive(Clone, Debug)]
957pub struct SgInfo {
958    /// Currently executed script version
959    pub script_version: ScriptVersion,
960    /// Currently executed script group
961    pub script_group: ScriptGroup,
962    /// Currently executed script hash
963    pub script_hash: Byte32,
964    /// DataPieceId for the root program
965    pub program_data_piece_id: DataPieceId,
966}
967
968impl<DL> SgData<DL> {
969    /// Creates a new SgData structure from TxData, and script group information
970    pub fn new(tx_data: &TxData<DL>, script_group: &ScriptGroup) -> Result<Self, ScriptError> {
971        let script_hash = script_group.script.calc_script_hash();
972        let script_version = tx_data.select_version(&script_group.script)?;
973        let dep_index = tx_data
974            .extract_referenced_dep_index(&script_group.script)?
975            .try_into()
976            .map_err(|_| ScriptError::Other("u32 overflow".to_string()))?;
977        Ok(Self {
978            rtx: Arc::clone(&tx_data.rtx),
979            tx_info: Arc::clone(&tx_data.info),
980            sg_info: Arc::new(SgInfo {
981                script_version,
982                script_hash,
983                script_group: script_group.clone(),
984                program_data_piece_id: DataPieceId::CellDep(dep_index),
985            }),
986        })
987    }
988
989    /// Shortcut to data loader
990    pub fn data_loader(&self) -> &DL {
991        &self.tx_info.data_loader
992    }
993
994    /// Shortcut to group input indices
995    pub fn group_inputs(&self) -> &[usize] {
996        &self.sg_info.script_group.input_indices
997    }
998
999    /// Shortcut to group output indices
1000    pub fn group_outputs(&self) -> &[usize] {
1001        &self.sg_info.script_group.output_indices
1002    }
1003
1004    /// Shortcut to all outputs
1005    pub fn outputs(&self) -> &[CellMeta] {
1006        &self.tx_info.outputs
1007    }
1008}
1009
1010impl<DL> DataSource<DataPieceId> for SgData<DL>
1011where
1012    DL: CellDataProvider,
1013{
1014    fn load_data(&self, id: &DataPieceId, offset: u64, length: u64) -> Option<(Bytes, u64)> {
1015        match id {
1016            DataPieceId::Input(i) => self
1017                .rtx
1018                .resolved_inputs
1019                .get(*i as usize)
1020                .and_then(|cell| self.data_loader().load_cell_data(cell)),
1021            DataPieceId::Output(i) => self
1022                .rtx
1023                .transaction
1024                .outputs_data()
1025                .get(*i as usize)
1026                .map(|data| data.raw_data()),
1027            DataPieceId::CellDep(i) => self
1028                .rtx
1029                .resolved_cell_deps
1030                .get(*i as usize)
1031                .and_then(|cell| self.data_loader().load_cell_data(cell)),
1032            DataPieceId::GroupInput(i) => self
1033                .sg_info
1034                .script_group
1035                .input_indices
1036                .get(*i as usize)
1037                .and_then(|gi| self.rtx.resolved_inputs.get(*gi))
1038                .and_then(|cell| self.data_loader().load_cell_data(cell)),
1039            DataPieceId::GroupOutput(i) => self
1040                .sg_info
1041                .script_group
1042                .output_indices
1043                .get(*i as usize)
1044                .and_then(|gi| self.rtx.transaction.outputs_data().get(*gi))
1045                .map(|data| data.raw_data()),
1046            DataPieceId::Witness(i) => self
1047                .rtx
1048                .transaction
1049                .witnesses()
1050                .get(*i as usize)
1051                .map(|data| data.raw_data()),
1052            DataPieceId::WitnessGroupInput(i) => self
1053                .sg_info
1054                .script_group
1055                .input_indices
1056                .get(*i as usize)
1057                .and_then(|gi| self.rtx.transaction.witnesses().get(*gi))
1058                .map(|data| data.raw_data()),
1059            DataPieceId::WitnessGroupOutput(i) => self
1060                .sg_info
1061                .script_group
1062                .output_indices
1063                .get(*i as usize)
1064                .and_then(|gi| self.rtx.transaction.witnesses().get(*gi))
1065                .map(|data| data.raw_data()),
1066        }
1067        .map(|data| {
1068            let offset = std::cmp::min(offset as usize, data.len());
1069            let full_length = data.len() - offset;
1070            let real_length = if length > 0 {
1071                std::cmp::min(full_length, length as usize)
1072            } else {
1073                full_length
1074            };
1075            (data.slice(offset..offset + real_length), full_length as u64)
1076        })
1077    }
1078}
1079
1080/// When the vm is initialized, arguments are loaded onto the stack.
1081/// This enum specifies how to locate these arguments.
1082#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1083pub enum VmArgs {
1084    /// Represents reading arguments from other vm.
1085    Reader {
1086        /// An identifier for the virtual machine/process.
1087        vm_id: u64,
1088        /// The number of arguments provided.
1089        argc: u64,
1090        /// The pointer of the actual arguments.
1091        argv: u64,
1092    },
1093    /// Represents reading arguments from a vector.
1094    Vector(Vec<Bytes>),
1095}
1096
1097/// Mutable data at virtual machine level
1098#[derive(Clone)]
1099pub struct VmContext<DL>
1100where
1101    DL: CellDataProvider,
1102{
1103    /// Cycles executed before current VM starts
1104    pub base_cycles: Arc<AtomicU64>,
1105    /// A mutable reference to scheduler's message box
1106    pub message_box: Arc<Mutex<Vec<Message>>>,
1107    /// A snapshot for COW usage
1108    pub snapshot2_context: Arc<Mutex<Snapshot2Context<DataPieceId, SgData<DL>>>>,
1109}
1110
1111impl<DL> VmContext<DL>
1112where
1113    DL: CellDataProvider + Clone,
1114{
1115    /// Creates a new VM context. It is by design that parameters to this function
1116    /// are references. It is a reminder that the inputs are designed to be shared
1117    /// among different entities.
1118    pub fn new(sg_data: &SgData<DL>, message_box: &Arc<Mutex<Vec<Message>>>) -> Self {
1119        Self {
1120            base_cycles: Arc::new(AtomicU64::new(0)),
1121            message_box: Arc::clone(message_box),
1122            snapshot2_context: Arc::new(Mutex::new(Snapshot2Context::new(sg_data.clone()))),
1123        }
1124    }
1125
1126    /// Sets current base cycles
1127    pub fn set_base_cycles(&mut self, base_cycles: u64) {
1128        self.base_cycles.store(base_cycles, Ordering::Release);
1129    }
1130}
1131
1132/// The scheduler's running mode.
1133#[derive(Clone)]
1134pub enum RunMode {
1135    /// Continues running until cycles are exhausted.
1136    LimitCycles(Cycle),
1137    /// Continues running until a Pause signal is received.
1138    Pause(Pause),
1139}