Skip to main content

polkadot_node_core_pvf_common/
execute.rs

1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
16
17use crate::{error::InternalValidationError, ArtifactChecksum};
18use codec::{Decode, Encode};
19use polkadot_node_primitives::PoV;
20use polkadot_parachain_primitives::primitives::ValidationResult;
21use polkadot_primitives::{
22	CandidateDescriptorVersion, CandidateReceiptV2 as CandidateReceipt, ExecutorParams, Hash,
23	PersistedValidationData,
24};
25use std::{sync::Arc, time::Duration};
26
27/// Contains all context needed to validate a candidate.
28/// This reduces parameter explosion and keeps related data together.
29///
30/// Use this struct when passing validation data through the system. When sending
31/// to the execute worker, use [`ValidationContext::into_execute_request`] to extract
32/// only the data needed by the worker.
33#[derive(Clone, Debug, Encode, Decode)]
34pub struct ValidationContext {
35	/// The candidate receipt being validated
36	pub candidate_receipt: CandidateReceipt,
37	/// Persisted validation data
38	pub pvd: Arc<PersistedValidationData>,
39	/// Proof-of-validity
40	pub pov: Arc<PoV>,
41	/// Execution parameters
42	pub executor_params: ExecutorParams,
43	/// Execution timeout
44	pub exec_timeout: Duration,
45	/// Whether the `CandidateReceiptV3` node feature has ever been seen enabled.
46	///
47	/// During the V3 transition period, this flag determines whether to trust
48	/// `descriptor.version()` or fall back to `descriptor.version_old_rules()`
49	/// for approval/dispute validations.
50	/// See `CandidateDescriptorV2::version_for_candidate_validation`.
51	pub v3_seen: bool,
52}
53
54impl ValidationContext {
55	/// Get the relay parent hash from the candidate descriptor
56	pub fn relay_parent(&self) -> Hash {
57		self.candidate_receipt.descriptor.relay_parent()
58	}
59
60	/// Get the scheduling parent hash, using transition-safe logic.
61	// Note: This uses _for_candidate_validation which is the transition-safe version.
62	// It is used in both backing and approval/dispute contexts.
63	pub fn scheduling_parent(&self) -> Hash {
64		self.candidate_receipt
65			.descriptor
66			.scheduling_parent_for_candidate_validation(self.v3_seen)
67	}
68
69	/// Get the effective candidate descriptor version, using transition-safe logic.
70	pub fn descriptor_version(&self) -> CandidateDescriptorVersion {
71		self.candidate_receipt.descriptor.version_for_candidate_validation(self.v3_seen)
72	}
73
74	/// Convert to an ExecuteRequest for sending to the worker.
75	/// This extracts only the data needed by the execute worker process.
76	/// Consumes self since the context is no longer needed after sending to the worker.
77	pub fn into_execute_request(self, artifact_checksum: ArtifactChecksum) -> ExecuteRequest {
78		ExecuteRequest {
79			pvd: (*self.pvd).clone(),
80			pov: (*self.pov).clone(),
81			execution_timeout: self.exec_timeout,
82			artifact_checksum,
83			relay_parent: self.relay_parent(),
84			scheduling_parent: self.scheduling_parent(),
85			descriptor_version: self.descriptor_version(),
86		}
87	}
88}
89
90/// The payload of the one-time handshake that is done when a worker process is created. Carries
91/// data from the host to the worker.
92#[derive(Encode, Decode)]
93pub struct Handshake {
94	/// The executor parameters.
95	pub executor_params: ExecutorParams,
96}
97
98/// A request to execute a PVF in the worker process.
99///
100/// This is the IPC message sent from the validation host to the execute worker.
101/// It contains only the minimal data needed by the worker to perform validation:
102/// - PVD and PoV to construct ValidationParams for the PVF
103/// - Timeout for execution limits
104/// - Artifact checksum for corruption detection
105/// - Parent hashes for V3+ extension to ValidationParams
106/// - Descriptor version to determine which ValidationParams format to use
107///
108/// Note: This does NOT include the full candidate receipt or other host-side data
109/// that the worker doesn't need.
110#[derive(Encode, Decode)]
111pub struct ExecuteRequest {
112	/// Persisted validation data
113	pub pvd: PersistedValidationData,
114	/// Proof-of-validity
115	pub pov: PoV,
116	/// Execution timeout
117	pub execution_timeout: Duration,
118	/// Checksum of the artifact to execute
119	pub artifact_checksum: ArtifactChecksum,
120	/// The relay parent block hash (for V3+ ValidationParams extension)
121	pub relay_parent: Hash,
122	/// The scheduling parent block hash (for V3+ ValidationParams extension)
123	pub scheduling_parent: Hash,
124	/// The candidate descriptor version (determines ValidationParams format)
125	pub descriptor_version: CandidateDescriptorVersion,
126}
127
128/// The response from the execution worker.
129#[derive(Debug, Encode, Decode)]
130pub struct WorkerResponse {
131	/// The response from the execute job process.
132	pub job_response: JobResponse,
133	/// The amount of CPU time taken by the job.
134	pub duration: Duration,
135	/// The uncompressed PoV size.
136	pub pov_size: u32,
137}
138
139/// An error occurred in the worker process.
140#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
141pub enum WorkerError {
142	/// The job timed out.
143	#[error("The job timed out")]
144	JobTimedOut,
145	/// The job process has died. We must kill the worker just in case.
146	///
147	/// We cannot treat this as an internal error because malicious code may have killed the job.
148	/// We still retry it, because in the non-malicious case it is likely spurious.
149	#[error("The job process (pid {job_pid}) has died: {err}")]
150	JobDied { err: String, job_pid: i32 },
151	/// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
152	/// etc.
153	///
154	/// Because malicious code can cause a job error, we must not treat it as an internal error. We
155	/// still retry it, because in the non-malicious case it is likely spurious.
156	#[error("An unexpected error occurred in the job process: {0}")]
157	JobError(#[from] JobError),
158
159	/// Some internal error occurred.
160	#[error("An internal error occurred: {0}")]
161	InternalError(#[from] InternalValidationError),
162}
163
164/// The result of a job on the execution worker.
165pub type JobResult = Result<JobResponse, JobError>;
166
167/// The successful response from a job on the execution worker.
168#[derive(Debug, Encode, Decode)]
169pub enum JobResponse {
170	Ok {
171		/// The result of parachain validation.
172		result_descriptor: ValidationResult,
173	},
174	/// A possibly transient runtime instantiation error happened during the execution; may be
175	/// retried with re-preparation
176	RuntimeConstruction(String),
177	/// The candidate is invalid.
178	InvalidCandidate(String),
179	/// PoV decompression failed
180	PoVDecompressionFailure,
181	/// The artifact is corrupted, re-prepare the artifact and try again.
182	CorruptedArtifact,
183}
184
185impl JobResponse {
186	/// Creates an invalid response from a context `ctx` and a message `msg` (which can be empty).
187	pub fn format_invalid(ctx: &'static str, msg: &str) -> Self {
188		if msg.is_empty() {
189			Self::InvalidCandidate(ctx.to_string())
190		} else {
191			Self::InvalidCandidate(format!("{}: {}", ctx, msg))
192		}
193	}
194
195	/// Creates a may retry response from a context `ctx` and a message `msg` (which can be empty).
196	pub fn runtime_construction(ctx: &'static str, msg: &str) -> Self {
197		if msg.is_empty() {
198			Self::RuntimeConstruction(ctx.to_string())
199		} else {
200			Self::RuntimeConstruction(format!("{}: {}", ctx, msg))
201		}
202	}
203}
204
205/// An unexpected error occurred in the execution job process. Because this comes from the job,
206/// which executes untrusted code, this error must likewise be treated as untrusted. That is, we
207/// cannot raise an internal error based on this.
208#[derive(thiserror::Error, Clone, Debug, Encode, Decode)]
209pub enum JobError {
210	#[error("The job timed out")]
211	TimedOut,
212	#[error("An unexpected panic has occurred in the execution job: {0}")]
213	Panic(String),
214	/// Some error occurred when interfacing with the kernel.
215	#[error("Error interfacing with the kernel: {0}")]
216	Kernel(String),
217	#[error("Could not spawn the requested thread: {0}")]
218	CouldNotSpawnThread(String),
219	#[error("An error occurred in the CPU time monitor thread: {0}")]
220	CpuTimeMonitorThread(String),
221	/// Since the job can return any exit status it wants, we have to treat this as untrusted.
222	#[error("Unexpected exit status: {0}")]
223	UnexpectedExitStatus(i32),
224}