polkadot_node_core_pvf_common/execute.rs
1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
16
17use crate::{error::InternalValidationError, ArtifactChecksum};
18use codec::{Decode, Encode};
19use polkadot_node_primitives::PoV;
20use polkadot_parachain_primitives::primitives::ValidationResult;
21use polkadot_primitives::{
22 CandidateDescriptorVersion, CandidateReceiptV2 as CandidateReceipt, ExecutorParams, Hash,
23 PersistedValidationData,
24};
25use std::{sync::Arc, time::Duration};
26
27/// Contains all context needed to validate a candidate.
28/// This reduces parameter explosion and keeps related data together.
29///
30/// Use this struct when passing validation data through the system. When sending
31/// to the execute worker, use [`ValidationContext::into_execute_request`] to extract
32/// only the data needed by the worker.
33#[derive(Clone, Debug, Encode, Decode)]
34pub struct ValidationContext {
35 /// The candidate receipt being validated
36 pub candidate_receipt: CandidateReceipt,
37 /// Persisted validation data
38 pub pvd: Arc<PersistedValidationData>,
39 /// Proof-of-validity
40 pub pov: Arc<PoV>,
41 /// Execution parameters
42 pub executor_params: ExecutorParams,
43 /// Execution timeout
44 pub exec_timeout: Duration,
45 /// Whether the `CandidateReceiptV3` node feature has ever been seen enabled.
46 ///
47 /// During the V3 transition period, this flag determines whether to trust
48 /// `descriptor.version()` or fall back to `descriptor.version_old_rules()`
49 /// for approval/dispute validations.
50 /// See `CandidateDescriptorV2::version_for_candidate_validation`.
51 pub v3_seen: bool,
52}
53
54impl ValidationContext {
55 /// Get the relay parent hash from the candidate descriptor
56 pub fn relay_parent(&self) -> Hash {
57 self.candidate_receipt.descriptor.relay_parent()
58 }
59
60 /// Get the scheduling parent hash, using transition-safe logic.
61 // Note: This uses _for_candidate_validation which is the transition-safe version.
62 // It is used in both backing and approval/dispute contexts.
63 pub fn scheduling_parent(&self) -> Hash {
64 self.candidate_receipt
65 .descriptor
66 .scheduling_parent_for_candidate_validation(self.v3_seen)
67 }
68
69 /// Get the effective candidate descriptor version, using transition-safe logic.
70 pub fn descriptor_version(&self) -> CandidateDescriptorVersion {
71 self.candidate_receipt.descriptor.version_for_candidate_validation(self.v3_seen)
72 }
73
74 /// Convert to an ExecuteRequest for sending to the worker.
75 /// This extracts only the data needed by the execute worker process.
76 /// Consumes self since the context is no longer needed after sending to the worker.
77 pub fn into_execute_request(self, artifact_checksum: ArtifactChecksum) -> ExecuteRequest {
78 ExecuteRequest {
79 pvd: (*self.pvd).clone(),
80 pov: (*self.pov).clone(),
81 execution_timeout: self.exec_timeout,
82 artifact_checksum,
83 relay_parent: self.relay_parent(),
84 scheduling_parent: self.scheduling_parent(),
85 descriptor_version: self.descriptor_version(),
86 }
87 }
88}
89
90/// The payload of the one-time handshake that is done when a worker process is created. Carries
91/// data from the host to the worker.
92#[derive(Encode, Decode)]
93pub struct Handshake {
94 /// The executor parameters.
95 pub executor_params: ExecutorParams,
96}
97
98/// A request to execute a PVF in the worker process.
99///
100/// This is the IPC message sent from the validation host to the execute worker.
101/// It contains only the minimal data needed by the worker to perform validation:
102/// - PVD and PoV to construct ValidationParams for the PVF
103/// - Timeout for execution limits
104/// - Artifact checksum for corruption detection
105/// - Parent hashes for V3+ extension to ValidationParams
106/// - Descriptor version to determine which ValidationParams format to use
107///
108/// Note: This does NOT include the full candidate receipt or other host-side data
109/// that the worker doesn't need.
110#[derive(Encode, Decode)]
111pub struct ExecuteRequest {
112 /// Persisted validation data
113 pub pvd: PersistedValidationData,
114 /// Proof-of-validity
115 pub pov: PoV,
116 /// Execution timeout
117 pub execution_timeout: Duration,
118 /// Checksum of the artifact to execute
119 pub artifact_checksum: ArtifactChecksum,
120 /// The relay parent block hash (for V3+ ValidationParams extension)
121 pub relay_parent: Hash,
122 /// The scheduling parent block hash (for V3+ ValidationParams extension)
123 pub scheduling_parent: Hash,
124 /// The candidate descriptor version (determines ValidationParams format)
125 pub descriptor_version: CandidateDescriptorVersion,
126}
127
128/// The response from the execution worker.
129#[derive(Debug, Encode, Decode)]
130pub struct WorkerResponse {
131 /// The response from the execute job process.
132 pub job_response: JobResponse,
133 /// The amount of CPU time taken by the job.
134 pub duration: Duration,
135 /// The uncompressed PoV size.
136 pub pov_size: u32,
137}
138
139/// An error occurred in the worker process.
140#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
141pub enum WorkerError {
142 /// The job timed out.
143 #[error("The job timed out")]
144 JobTimedOut,
145 /// The job process has died. We must kill the worker just in case.
146 ///
147 /// We cannot treat this as an internal error because malicious code may have killed the job.
148 /// We still retry it, because in the non-malicious case it is likely spurious.
149 #[error("The job process (pid {job_pid}) has died: {err}")]
150 JobDied { err: String, job_pid: i32 },
151 /// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
152 /// etc.
153 ///
154 /// Because malicious code can cause a job error, we must not treat it as an internal error. We
155 /// still retry it, because in the non-malicious case it is likely spurious.
156 #[error("An unexpected error occurred in the job process: {0}")]
157 JobError(#[from] JobError),
158
159 /// Some internal error occurred.
160 #[error("An internal error occurred: {0}")]
161 InternalError(#[from] InternalValidationError),
162}
163
164/// The result of a job on the execution worker.
165pub type JobResult = Result<JobResponse, JobError>;
166
167/// The successful response from a job on the execution worker.
168#[derive(Debug, Encode, Decode)]
169pub enum JobResponse {
170 Ok {
171 /// The result of parachain validation.
172 result_descriptor: ValidationResult,
173 },
174 /// A possibly transient runtime instantiation error happened during the execution; may be
175 /// retried with re-preparation
176 RuntimeConstruction(String),
177 /// The candidate is invalid.
178 InvalidCandidate(String),
179 /// PoV decompression failed
180 PoVDecompressionFailure,
181 /// The artifact is corrupted, re-prepare the artifact and try again.
182 CorruptedArtifact,
183}
184
185impl JobResponse {
186 /// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty).
187 pub fn format_invalid(ctx: &'static str, msg: &str) -> Self {
188 if msg.is_empty() {
189 Self::InvalidCandidate(ctx.to_string())
190 } else {
191 Self::InvalidCandidate(format!("{}: {}", ctx, msg))
192 }
193 }
194
195 /// Creates a may retry response from a context `ctx` and a message `msg` (which can be empty).
196 pub fn runtime_construction(ctx: &'static str, msg: &str) -> Self {
197 if msg.is_empty() {
198 Self::RuntimeConstruction(ctx.to_string())
199 } else {
200 Self::RuntimeConstruction(format!("{}: {}", ctx, msg))
201 }
202 }
203}
204
205/// An unexpected error occurred in the execution job process. Because this comes from the job,
206/// which executes untrusted code, this error must likewise be treated as untrusted. That is, we
207/// cannot raise an internal error based on this.
208#[derive(thiserror::Error, Clone, Debug, Encode, Decode)]
209pub enum JobError {
210 #[error("The job timed out")]
211 TimedOut,
212 #[error("An unexpected panic has occurred in the execution job: {0}")]
213 Panic(String),
214 /// Some error occurred when interfacing with the kernel.
215 #[error("Error interfacing with the kernel: {0}")]
216 Kernel(String),
217 #[error("Could not spawn the requested thread: {0}")]
218 CouldNotSpawnThread(String),
219 #[error("An error occurred in the CPU time monitor thread: {0}")]
220 CpuTimeMonitorThread(String),
221 /// Since the job can return any exit status it wants, we have to treat this as untrusted.
222 #[error("Unexpected exit status: {0}")]
223 UnexpectedExitStatus(i32),
224}