Skip to main content

lean_rs_worker/
capability.rs

1//! Builder for worker-backed downstream capabilities.
2//!
3//! This module composes Lake target building, worker child resolution, worker
4//! startup, session opening, and optional metadata validation. It deliberately
5//! does not know downstream command names or row schemas.
6
7use std::env;
8use std::path::{Path, PathBuf};
9use std::process::Command;
10use std::time::Duration;
11
12use lean_rs::{LeanBuiltCapability, LeanCapabilityPreflight, LeanLoaderDiagnosticCode};
13use serde::Deserialize;
14use serde_json::Value;
15
16use crate::pool::{LeanWorkerRestartPolicyClass, LeanWorkerSessionKey};
17use crate::session::{
18    LeanWorkerCancellationToken, LeanWorkerProgressSink, LeanWorkerRuntimeMetadata, LeanWorkerSession,
19    LeanWorkerSessionConfig,
20};
21use crate::supervisor::{
22    LEAN_WORKER_REQUEST_TIMEOUT_LONG_RUNNING, LeanWorker, LeanWorkerConfig, LeanWorkerError, LeanWorkerRestartPolicy,
23};
24use crate::types::LeanWorkerCapabilityMetadata;
25
26const WORKER_CHILD_ENV: &str = "LEAN_RS_WORKER_CHILD";
27
28/// Builder for a worker-backed Lean capability session.
29///
30/// The builder hides the common setup sequence for downstream tools:
31///
32/// 1. build the Lake shared-library target with `lean-toolchain`;
33/// 2. resolve and start the `lean-rs-worker-child` process;
34/// 3. health-check the worker;
35/// 4. open the configured host session once; and
36/// 5. optionally validate downstream capability metadata.
37///
38/// Callers still provide the Lake project root, package name, library target,
39/// and imports because those are the downstream capability's identity. Worker
40/// framing, child lifecycle, path probing, timeouts, and restart policy stay
41/// behind the builder.
42#[derive(Clone, Debug)]
43pub struct LeanWorkerCapabilityBuilder {
44    project_root: PathBuf,
45    package: String,
46    lib_name: String,
47    imports: Vec<String>,
48    built_dylib_path: Option<PathBuf>,
49    built_capability: Option<LeanBuiltCapability>,
50    worker_child: Option<LeanWorkerChild>,
51    startup_timeout: Option<Duration>,
52    request_timeout: Option<Duration>,
53    restart_policy: Option<LeanWorkerRestartPolicy>,
54    metadata_check: Option<CapabilityMetadataCheck>,
55}
56
57impl LeanWorkerCapabilityBuilder {
58    /// Create a builder for a Lake project and capability library.
59    ///
60    /// `project_root` is the directory containing `lakefile.lean`. `package`
61    /// is the Lake package name used by `lean-rs-host`, and `lib_name` is the
62    /// Lake `lean_lib` target to build and load.
63    #[must_use]
64    pub fn new(
65        project_root: impl Into<PathBuf>,
66        package: impl Into<String>,
67        lib_name: impl Into<String>,
68        imports: impl IntoIterator<Item = impl Into<String>>,
69    ) -> Self {
70        Self {
71            project_root: project_root.into(),
72            package: package.into(),
73            lib_name: lib_name.into(),
74            imports: imports.into_iter().map(Into::into).collect(),
75            built_dylib_path: None,
76            built_capability: None,
77            worker_child: None,
78            startup_timeout: None,
79            request_timeout: None,
80            restart_policy: None,
81            metadata_check: None,
82        }
83    }
84
85    /// Create a builder from a build-script produced capability.
86    ///
87    /// Manifest-backed descriptors are the canonical packaged-app path. The
88    /// builder reads package, module, and primary dylib facts from the
89    /// manifest, then infers the Lake project root from the standard
90    /// `.lake/build/lib/<dylib>` layout so the worker child can initialize
91    /// Lean's import search path. Direct dylib descriptors remain supported as
92    /// a compatibility path when callers also provide package and module names.
93    ///
94    /// # Errors
95    ///
96    /// Returns `LeanWorkerError` if manifest data cannot be parsed, the
97    /// fallback dylib path cannot be resolved, the compatibility descriptor is
98    /// missing package/module names, or the dylib is not under a standard Lake
99    /// build directory.
100    pub fn from_built_capability(
101        spec: &LeanBuiltCapability,
102        imports: impl IntoIterator<Item = impl Into<String>>,
103    ) -> Result<Self, LeanWorkerError> {
104        let artifact = WorkerCapabilityArtifact::from_built_capability(spec)?;
105        let project_root = infer_lake_project_root_from_dylib(&artifact.dylib_path)?;
106        Ok(Self {
107            project_root,
108            package: artifact.package,
109            lib_name: artifact.module,
110            imports: imports.into_iter().map(Into::into).collect(),
111            built_dylib_path: Some(artifact.dylib_path),
112            built_capability: Some(spec.clone()),
113            worker_child: None,
114            startup_timeout: None,
115            request_timeout: None,
116            restart_policy: None,
117            metadata_check: None,
118        })
119    }
120
121    /// Use an explicit `lean-rs-worker-child` executable.
122    ///
123    /// Tests and packaged applications should use this when the worker child
124    /// is not discoverable beside the current executable.
125    #[must_use]
126    pub fn worker_executable(mut self, path: impl Into<PathBuf>) -> Self {
127        self.worker_child = Some(LeanWorkerChild::path(path));
128        self
129    }
130
131    /// Resolve the worker executable with a packaged worker-child locator.
132    #[must_use]
133    pub fn worker_child(mut self, child: LeanWorkerChild) -> Self {
134        self.worker_child = Some(child);
135        self
136    }
137
138    /// Set the maximum time to wait for worker startup.
139    #[must_use]
140    pub fn startup_timeout(mut self, timeout: Duration) -> Self {
141        self.startup_timeout = Some(timeout);
142        self
143    }
144
145    /// Set the maximum time to wait for one worker request.
146    #[must_use]
147    pub fn request_timeout(mut self, timeout: Duration) -> Self {
148        self.request_timeout = Some(timeout);
149        self
150    }
151
152    /// Use the documented long-running request timeout profile.
153    #[must_use]
154    pub fn long_running_requests(mut self) -> Self {
155        self.request_timeout = Some(LEAN_WORKER_REQUEST_TIMEOUT_LONG_RUNNING);
156        self
157    }
158
159    /// Set the worker restart policy used after startup.
160    #[must_use]
161    pub fn restart_policy(mut self, policy: LeanWorkerRestartPolicy) -> Self {
162        self.restart_policy = Some(policy);
163        self
164    }
165
166    /// Validate generic capability metadata after the session opens.
167    ///
168    /// The export must have ABI `String -> IO String`, matching
169    /// `LeanWorkerSession::capability_metadata`. The returned metadata is
170    /// stored on the opened capability for callers that need it.
171    #[must_use]
172    pub fn validate_metadata(mut self, export: impl Into<String>, request: Value) -> Self {
173        self.metadata_check = Some(CapabilityMetadataCheck {
174            export: export.into(),
175            request,
176            expected: None,
177        });
178        self
179    }
180
181    /// Validate that a capability metadata export returns the expected facts.
182    ///
183    /// This is the pool-facing metadata expectation hook. The metadata remains
184    /// downstream-defined; `lean-rs-worker` only checks that the generic
185    /// metadata envelope matches the caller's requested expectation.
186    #[must_use]
187    pub fn expect_metadata(
188        mut self,
189        export: impl Into<String>,
190        request: Value,
191        expected: LeanWorkerCapabilityMetadata,
192    ) -> Self {
193        self.metadata_check = Some(CapabilityMetadataCheck {
194            export: export.into(),
195            request,
196            expected: Some(expected),
197        });
198        self
199    }
200
201    /// Return the session reuse key represented by this builder.
202    ///
203    /// The key is for worker-pool reuse only. It is not a downstream cache key
204    /// and does not encode row schemas, ranking, reporting, or source
205    /// provenance.
206    #[must_use]
207    pub fn session_key(&self) -> LeanWorkerSessionKey {
208        let restart_policy_class = match &self.restart_policy {
209            Some(policy) if policy == &LeanWorkerRestartPolicy::default() => LeanWorkerRestartPolicyClass::Default,
210            Some(_policy) => LeanWorkerRestartPolicyClass::Custom,
211            None => LeanWorkerRestartPolicyClass::Default,
212        };
213        let mut key = LeanWorkerSessionKey::new(
214            self.project_root.clone(),
215            self.package.clone(),
216            self.lib_name.clone(),
217            self.imports.clone(),
218        )
219        .restart_policy_class(restart_policy_class);
220        if let Some(check) = &self.metadata_check {
221            key = key.metadata_expectation(check.export.clone(), check.request.clone(), check.expected.clone());
222        }
223        key
224    }
225
226    pub(crate) fn pool_request_timeout(&self) -> Duration {
227        self.request_timeout
228            .unwrap_or(crate::supervisor::LEAN_WORKER_REQUEST_TIMEOUT_DEFAULT)
229    }
230
231    /// Check deployment facts before running a real worker command.
232    ///
233    /// The report validates the worker child locator, manifest-backed
234    /// capability artifact when present, worker protocol handshake, session
235    /// opening, and optional metadata expectation. It keeps child paths,
236    /// protocol frames, and loader environment details below the worker
237    /// boundary.
238    #[must_use]
239    pub fn check(&self) -> LeanWorkerBootstrapReport {
240        let mut checks = self.bootstrap_static_checks();
241        if checks.iter().any(LeanWorkerBootstrapCheck::is_error) {
242            return LeanWorkerBootstrapReport::new(checks);
243        }
244
245        match self.clone().open_unchecked() {
246            Ok(capability) => {
247                drop(capability.terminate());
248            }
249            Err(err) => checks.push(check_from_open_error(&err)),
250        }
251        LeanWorkerBootstrapReport::new(checks)
252    }
253
254    fn bootstrap_static_checks(&self) -> Vec<LeanWorkerBootstrapCheck> {
255        let mut checks = Vec::new();
256        match self
257            .worker_child
258            .as_ref()
259            .map_or_else(resolve_default_worker_executable, LeanWorkerChild::resolve)
260        {
261            Ok(path) => {
262                if let Err(err) = validate_worker_child_path(&path) {
263                    checks.push(check_from_open_error(&err));
264                }
265            }
266            Err(err) => checks.push(check_from_open_error(&err)),
267        }
268
269        if let Some(spec) = &self.built_capability
270            && spec.resolved_manifest_path().is_ok()
271        {
272            let report = LeanCapabilityPreflight::new(spec.clone()).check();
273            for check in report.errors() {
274                checks.push(LeanWorkerBootstrapCheck::error(
275                    LeanWorkerBootstrapDiagnosticCode::CapabilityPreflight { code: check.code() },
276                    check.subject().to_owned(),
277                    check.message().to_owned(),
278                    check.repair_hint().to_owned(),
279                ));
280            }
281        }
282        checks
283    }
284
285    /// Build the Lake target, start the worker, open the session, and return a ready capability.
286    ///
287    /// # Errors
288    ///
289    /// Returns `LeanWorkerError` if Lake cannot build the target, the worker
290    /// child cannot be resolved or spawned, the worker fails startup/health,
291    /// the session cannot open, or metadata validation fails.
292    pub fn open(self) -> Result<LeanWorkerCapability, LeanWorkerError> {
293        let report = self.bootstrap_static_report();
294        if let Some(check) = report.first_error() {
295            return Err(LeanWorkerError::Bootstrap {
296                code: check.code(),
297                message: check.message().to_owned(),
298            });
299        }
300        self.open_unchecked()
301    }
302
303    fn bootstrap_static_report(&self) -> LeanWorkerBootstrapReport {
304        LeanWorkerBootstrapReport::new(self.bootstrap_static_checks())
305    }
306
307    fn open_unchecked(self) -> Result<LeanWorkerCapability, LeanWorkerError> {
308        let dylib_path = match self.built_dylib_path {
309            Some(path) => path,
310            None => lean_toolchain::build_lake_target_quiet(&self.project_root, &self.lib_name)
311                .map_err(|diagnostic| LeanWorkerError::CapabilityBuild { diagnostic })?,
312        };
313        let worker_executable = self
314            .worker_child
315            .map_or_else(resolve_default_worker_executable, |child| child.resolve())?;
316        validate_worker_child_path(&worker_executable)?;
317
318        let mut config = LeanWorkerConfig::new(worker_executable);
319        if let Some(timeout) = self.startup_timeout {
320            config = config.startup_timeout(timeout);
321        }
322        if let Some(timeout) = self.request_timeout {
323            config = config.request_timeout(timeout);
324        }
325        if let Some(policy) = self.restart_policy {
326            config = config.restart_policy(policy);
327        }
328
329        let mut worker = LeanWorker::spawn(&config)?;
330        worker.health()?;
331
332        let session_config = LeanWorkerSessionConfig::new(
333            self.project_root.clone(),
334            self.package.clone(),
335            self.lib_name.clone(),
336            self.imports.clone(),
337        );
338
339        let validated_metadata = {
340            let mut session = worker.open_session(&session_config, None, None)?;
341            match self.metadata_check {
342                Some(check) => {
343                    let metadata = session.capability_metadata(&check.export, &check.request, None, None)?;
344                    if let Some(expected) = check.expected
345                        && metadata != expected
346                    {
347                        return Err(LeanWorkerError::CapabilityMetadataMismatch {
348                            export: check.export,
349                            expected: Box::new(expected),
350                            actual: Box::new(metadata),
351                        });
352                    }
353                    Some(metadata)
354                }
355                None => None,
356            }
357        };
358
359        Ok(LeanWorkerCapability {
360            worker,
361            session_config,
362            dylib_path,
363            validated_metadata,
364        })
365    }
366}
367
368/// Stable worker bootstrap diagnostic codes.
369#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
370pub enum LeanWorkerBootstrapDiagnosticCode {
371    /// The worker child locator did not resolve to a file.
372    WorkerChildUnresolved,
373    /// The worker child exists but is not executable.
374    WorkerChildNotExecutable,
375    /// Manifest-backed capability preflight reported a loader/artifact issue.
376    CapabilityPreflight { code: LeanLoaderDiagnosticCode },
377    /// The worker child did not complete the protocol handshake.
378    WorkerHandshakeFailed,
379    /// Capability metadata did not match the caller's expectation.
380    CapabilityMetadataMismatch,
381    /// Worker bootstrap failed for a reason outside the named deployment checks.
382    WorkerStartupFailed,
383}
384
385impl LeanWorkerBootstrapDiagnosticCode {
386    /// Stable string identifier suitable for logs and support reports.
387    #[must_use]
388    pub const fn as_str(self) -> &'static str {
389        match self {
390            Self::WorkerChildUnresolved => "lean_rs.worker.bootstrap.child_unresolved",
391            Self::WorkerChildNotExecutable => "lean_rs.worker.bootstrap.child_not_executable",
392            Self::CapabilityPreflight { code } => code.as_str(),
393            Self::WorkerHandshakeFailed => "lean_rs.worker.bootstrap.handshake_failed",
394            Self::CapabilityMetadataMismatch => "lean_rs.worker.bootstrap.metadata_mismatch",
395            Self::WorkerStartupFailed => "lean_rs.worker.bootstrap.startup_failed",
396        }
397    }
398}
399
400impl std::fmt::Display for LeanWorkerBootstrapDiagnosticCode {
401    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
402        f.write_str(self.as_str())
403    }
404}
405
406/// Severity of one worker bootstrap finding.
407#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
408pub enum LeanWorkerBootstrapSeverity {
409    /// Informational finding that does not block startup.
410    Info,
411    /// Suspicious state that may still start.
412    Warning,
413    /// The worker should not start real commands until this is fixed.
414    Error,
415}
416
417/// One bounded worker bootstrap finding.
418#[derive(Clone, Debug, Eq, PartialEq)]
419pub struct LeanWorkerBootstrapCheck {
420    code: LeanWorkerBootstrapDiagnosticCode,
421    severity: LeanWorkerBootstrapSeverity,
422    subject: String,
423    message: String,
424    repair_hint: String,
425}
426
427impl LeanWorkerBootstrapCheck {
428    fn error(
429        code: LeanWorkerBootstrapDiagnosticCode,
430        subject: impl Into<String>,
431        message: impl Into<String>,
432        repair_hint: impl Into<String>,
433    ) -> Self {
434        Self {
435            code,
436            severity: LeanWorkerBootstrapSeverity::Error,
437            subject: bound_bootstrap_text(subject.into()),
438            message: bound_bootstrap_text(message.into()),
439            repair_hint: bound_bootstrap_text(repair_hint.into()),
440        }
441    }
442
443    /// Stable diagnostic code.
444    #[must_use]
445    pub fn code(&self) -> LeanWorkerBootstrapDiagnosticCode {
446        self.code
447    }
448
449    /// Whether this finding blocks worker startup.
450    #[must_use]
451    pub fn severity(&self) -> LeanWorkerBootstrapSeverity {
452        self.severity
453    }
454
455    /// Child binary, artifact, export, or protocol step this finding concerns.
456    #[must_use]
457    pub fn subject(&self) -> &str {
458        &self.subject
459    }
460
461    /// Bounded explanation of the finding.
462    #[must_use]
463    pub fn message(&self) -> &str {
464        &self.message
465    }
466
467    /// Bounded repair hint for packaged applications.
468    #[must_use]
469    pub fn repair_hint(&self) -> &str {
470        &self.repair_hint
471    }
472
473    fn is_error(&self) -> bool {
474        self.severity == LeanWorkerBootstrapSeverity::Error
475    }
476}
477
478/// Structured result of worker bootstrap checks for one capability builder.
479#[derive(Clone, Debug, Eq, PartialEq)]
480pub struct LeanWorkerBootstrapReport {
481    checks: Vec<LeanWorkerBootstrapCheck>,
482}
483
484impl LeanWorkerBootstrapReport {
485    fn new(checks: Vec<LeanWorkerBootstrapCheck>) -> Self {
486        Self { checks }
487    }
488
489    /// All bootstrap findings.
490    #[must_use]
491    pub fn checks(&self) -> &[LeanWorkerBootstrapCheck] {
492        &self.checks
493    }
494
495    /// Blocking bootstrap findings.
496    pub fn errors(&self) -> impl Iterator<Item = &LeanWorkerBootstrapCheck> {
497        self.checks
498            .iter()
499            .filter(|check| check.severity == LeanWorkerBootstrapSeverity::Error)
500    }
501
502    /// Whether the worker bootstrap checks found no blocking findings.
503    #[must_use]
504    pub fn is_ok(&self) -> bool {
505        self.first_error().is_none()
506    }
507
508    /// First blocking finding, if any.
509    #[must_use]
510    pub fn first_error(&self) -> Option<&LeanWorkerBootstrapCheck> {
511        self.errors().next()
512    }
513}
514
515/// A worker-backed capability with its Lake target built and worker started.
516///
517/// The value owns the worker supervisor and the session configuration. It is
518/// the normal entry point for downstream capability use until the typed command
519/// facade lands on top of it.
520#[derive(Debug)]
521pub struct LeanWorkerCapability {
522    worker: LeanWorker,
523    session_config: LeanWorkerSessionConfig,
524    dylib_path: PathBuf,
525    validated_metadata: Option<LeanWorkerCapabilityMetadata>,
526}
527
528impl LeanWorkerCapability {
529    /// Open a worker session for this capability.
530    ///
531    /// The builder has already proved that the session can open. This method
532    /// is still fallible because worker cycling, cancellation, or a child
533    /// failure may require a fresh session.
534    ///
535    /// # Errors
536    ///
537    /// Returns `LeanWorkerError` if the worker is dead, the child cannot open
538    /// the configured imports, cancellation is already requested, a progress
539    /// sink panics, or protocol communication fails.
540    pub fn open_session(
541        &mut self,
542        cancellation: Option<&LeanWorkerCancellationToken>,
543        progress: Option<&dyn LeanWorkerProgressSink>,
544    ) -> Result<LeanWorkerSession<'_>, LeanWorkerError> {
545        self.worker.open_session(&self.session_config, cancellation, progress)
546    }
547
548    /// Open a worker session with a caller-supplied import set, overriding the imports
549    /// the builder was constructed with. The capability's `project_root` / `package` /
550    /// `lib_name` are unchanged.
551    ///
552    /// Lifecycle is identical to [`open_session`](Self::open_session): the returned
553    /// session borrows from `&mut self` and dies when dropped.
554    ///
555    /// # Errors
556    ///
557    /// Same as [`open_session`](Self::open_session).
558    pub fn open_session_with_imports(
559        &mut self,
560        imports: impl IntoIterator<Item = impl Into<String>>,
561        cancellation: Option<&LeanWorkerCancellationToken>,
562        progress: Option<&dyn LeanWorkerProgressSink>,
563    ) -> Result<LeanWorkerSession<'_>, LeanWorkerError> {
564        let config = LeanWorkerSessionConfig::new(
565            self.session_config.project_root_string(),
566            self.session_config.package().to_owned(),
567            self.session_config.lib_name().to_owned(),
568            imports,
569        );
570        self.worker.open_session(&config, cancellation, progress)
571    }
572
573    /// Return the built capability dylib path resolved by `lean-toolchain`.
574    #[must_use]
575    pub fn dylib_path(&self) -> &Path {
576        &self.dylib_path
577    }
578
579    /// Return the session configuration used by this capability.
580    #[must_use]
581    pub fn session_config(&self) -> &LeanWorkerSessionConfig {
582        &self.session_config
583    }
584
585    /// Return capability metadata validated by the builder, if requested.
586    #[must_use]
587    pub fn validated_metadata(&self) -> Option<&LeanWorkerCapabilityMetadata> {
588        self.validated_metadata.as_ref()
589    }
590
591    /// Return protocol/runtime facts captured from the worker handshake.
592    #[must_use]
593    pub fn runtime_metadata(&self) -> LeanWorkerRuntimeMetadata {
594        self.worker.runtime_metadata()
595    }
596
597    /// Borrow the underlying worker for lifecycle operations such as cycling.
598    #[must_use]
599    pub fn worker(&self) -> &LeanWorker {
600        &self.worker
601    }
602
603    /// Mutably borrow the underlying worker for lifecycle operations such as cycling.
604    #[must_use]
605    pub fn worker_mut(&mut self) -> &mut LeanWorker {
606        &mut self.worker
607    }
608
609    /// Terminate the worker child and return its exit status.
610    ///
611    /// # Errors
612    ///
613    /// Returns `LeanWorkerError` if the worker is already dead, the terminate
614    /// request fails, or waiting for the child fails.
615    pub fn terminate(self) -> Result<crate::supervisor::LeanWorkerExit, LeanWorkerError> {
616        self.worker.terminate()
617    }
618}
619
620#[derive(Clone, Debug)]
621struct CapabilityMetadataCheck {
622    export: String,
623    request: Value,
624    expected: Option<LeanWorkerCapabilityMetadata>,
625}
626
627#[derive(Debug)]
628struct WorkerCapabilityArtifact {
629    dylib_path: PathBuf,
630    package: String,
631    module: String,
632}
633
634impl WorkerCapabilityArtifact {
635    fn from_built_capability(spec: &LeanBuiltCapability) -> Result<Self, LeanWorkerError> {
636        if let Ok(manifest_path) = spec.resolved_manifest_path() {
637            return Self::from_manifest(&manifest_path);
638        }
639
640        let dylib_path = spec.dylib_path().map_err(|err| LeanWorkerError::Setup {
641            message: err.to_string(),
642        })?;
643        let package = spec.package_name().ok_or_else(|| LeanWorkerError::Setup {
644            message: "LeanBuiltCapability is missing the Lake package name; call `.package(...)`".to_owned(),
645        })?;
646        let module = spec.module_name().ok_or_else(|| LeanWorkerError::Setup {
647            message: "LeanBuiltCapability is missing the root Lean module name; call `.module(...)`".to_owned(),
648        })?;
649        Ok(Self {
650            dylib_path,
651            package: package.to_owned(),
652            module: module.to_owned(),
653        })
654    }
655
656    fn from_manifest(manifest_path: &Path) -> Result<Self, LeanWorkerError> {
657        let bytes = std::fs::read(manifest_path).map_err(|err| LeanWorkerError::Bootstrap {
658            code: LeanWorkerBootstrapDiagnosticCode::CapabilityPreflight {
659                code: LeanLoaderDiagnosticCode::MissingManifest,
660            },
661            message: format!(
662                "could not read Lean capability manifest '{}': {err}",
663                manifest_path.display()
664            ),
665        })?;
666        let manifest: WorkerCapabilityManifest =
667            serde_json::from_slice(&bytes).map_err(|err| LeanWorkerError::Bootstrap {
668                code: LeanWorkerBootstrapDiagnosticCode::CapabilityPreflight {
669                    code: LeanLoaderDiagnosticCode::MalformedManifest,
670                },
671                message: format!(
672                    "Lean capability manifest '{}' is malformed: {err}",
673                    manifest_path.display()
674                ),
675            })?;
676        if manifest.schema_version != u64::from(lean_toolchain::CAPABILITY_MANIFEST_SCHEMA_VERSION) {
677            return Err(LeanWorkerError::Bootstrap {
678                code: LeanWorkerBootstrapDiagnosticCode::CapabilityPreflight {
679                    code: LeanLoaderDiagnosticCode::UnsupportedManifestSchema,
680                },
681                message: format!(
682                    "unsupported Lean capability manifest schema {}; supported schema is {}",
683                    manifest.schema_version,
684                    lean_toolchain::CAPABILITY_MANIFEST_SCHEMA_VERSION
685                ),
686            });
687        }
688        Ok(Self {
689            dylib_path: manifest.primary_dylib,
690            package: manifest.package,
691            module: manifest.module,
692        })
693    }
694}
695
696#[derive(Deserialize)]
697struct WorkerCapabilityManifest {
698    schema_version: u64,
699    primary_dylib: PathBuf,
700    package: String,
701    module: String,
702}
703
704/// Locator for an app-owned worker child executable.
705///
706/// Dependency binaries are not automatically installed with downstream
707/// applications. Production apps should ship a tiny binary that calls
708/// [`crate::run_worker_child_stdio`] and point the capability builder at it
709/// through this locator.
710#[derive(Clone, Debug, Eq, PartialEq)]
711pub struct LeanWorkerChild {
712    executable_name: Option<String>,
713    explicit_path: Option<PathBuf>,
714    env_var: Option<String>,
715}
716
717impl LeanWorkerChild {
718    /// Locate a worker child beside the current executable, or beside the
719    /// Cargo profile directory during tests and `cargo run`.
720    #[must_use]
721    pub fn sibling(executable_name: impl Into<String>) -> Self {
722        Self {
723            executable_name: Some(with_exe_suffix(executable_name.into())),
724            explicit_path: None,
725            env_var: None,
726        }
727    }
728
729    /// Use an explicit worker child path.
730    #[must_use]
731    pub fn path(path: impl Into<PathBuf>) -> Self {
732        Self {
733            executable_name: None,
734            explicit_path: Some(path.into()),
735            env_var: None,
736        }
737    }
738
739    /// Add an environment-variable override for launchers and tests.
740    #[must_use]
741    pub fn env_override(mut self, env_var: impl Into<String>) -> Self {
742        self.env_var = Some(env_var.into());
743        self
744    }
745
746    fn resolve(&self) -> Result<PathBuf, LeanWorkerError> {
747        let mut tried = Vec::new();
748        if let Some(env_var) = &self.env_var
749            && let Some(value) = env::var_os(env_var)
750        {
751            let path = PathBuf::from(value);
752            if path.is_file() {
753                return Ok(path);
754            }
755            tried.push(path);
756            return Err(LeanWorkerError::WorkerChildUnresolved { tried });
757        }
758        if let Some(path) = &self.explicit_path {
759            return Ok(path.clone());
760        }
761
762        let executable_name = self
763            .executable_name
764            .clone()
765            .unwrap_or_else(|| with_exe_suffix("lean-rs-worker-child".to_owned()));
766        tried.extend(candidate_sibling_worker_paths(&executable_name));
767        if executable_name == with_exe_suffix("lean-rs-worker-child".to_owned())
768            && let Some(path) = try_build_workspace_worker_child(&executable_name, &mut tried)
769        {
770            return Ok(path);
771        }
772        for path in dedup_paths(&tried) {
773            if path.is_file() {
774                return Ok(path);
775            }
776        }
777        Err(LeanWorkerError::WorkerChildUnresolved { tried })
778    }
779}
780
781impl Default for LeanWorkerChild {
782    fn default() -> Self {
783        Self::sibling("lean-rs-worker-child").env_override(WORKER_CHILD_ENV)
784    }
785}
786
787fn resolve_default_worker_executable() -> Result<PathBuf, LeanWorkerError> {
788    LeanWorkerChild::default().resolve()
789}
790
791fn validate_worker_child_path(path: &Path) -> Result<(), LeanWorkerError> {
792    if !path.is_file() {
793        return Err(LeanWorkerError::WorkerChildNotExecutable {
794            path: path.to_path_buf(),
795            reason: "path does not point to a file".to_owned(),
796        });
797    }
798    if !is_executable_file(path) {
799        return Err(LeanWorkerError::WorkerChildNotExecutable {
800            path: path.to_path_buf(),
801            reason: "file is not executable by this user".to_owned(),
802        });
803    }
804    Ok(())
805}
806
807#[cfg(unix)]
808fn is_executable_file(path: &Path) -> bool {
809    use std::os::unix::fs::PermissionsExt as _;
810
811    std::fs::metadata(path).is_ok_and(|metadata| metadata.permissions().mode() & 0o111 != 0)
812}
813
814#[cfg(not(unix))]
815fn is_executable_file(_path: &Path) -> bool {
816    true
817}
818
819fn check_from_open_error(err: &LeanWorkerError) -> LeanWorkerBootstrapCheck {
820    match err {
821        LeanWorkerError::WorkerChildUnresolved { tried } => LeanWorkerBootstrapCheck::error(
822            LeanWorkerBootstrapDiagnosticCode::WorkerChildUnresolved,
823            "worker child",
824            format!("could not resolve worker child; tried {}", format_paths(tried)),
825            "ship an app-owned worker child binary beside the app or configure LeanWorkerChild::env_override",
826        ),
827        LeanWorkerError::WorkerChildNotExecutable { path, reason } => LeanWorkerBootstrapCheck::error(
828            LeanWorkerBootstrapDiagnosticCode::WorkerChildNotExecutable,
829            path.display().to_string(),
830            reason.clone(),
831            "ship an app-owned worker child binary and ensure it is executable",
832        ),
833        LeanWorkerError::Bootstrap { code, message } => LeanWorkerBootstrapCheck::error(
834            *code,
835            code.as_str(),
836            message.clone(),
837            "fix the reported bootstrap input",
838        ),
839        LeanWorkerError::Handshake { message } => LeanWorkerBootstrapCheck::error(
840            LeanWorkerBootstrapDiagnosticCode::WorkerHandshakeFailed,
841            "worker handshake",
842            message.clone(),
843            "ensure the worker child calls lean_rs_worker::run_worker_child_stdio and matches this crate version",
844        ),
845        LeanWorkerError::Timeout {
846            operation: "startup", ..
847        } => LeanWorkerBootstrapCheck::error(
848            LeanWorkerBootstrapDiagnosticCode::WorkerHandshakeFailed,
849            "worker handshake",
850            err.to_string(),
851            "check that the worker child starts promptly and writes the lean-rs-worker handshake",
852        ),
853        LeanWorkerError::CapabilityMetadataMismatch { export, .. } => LeanWorkerBootstrapCheck::error(
854            LeanWorkerBootstrapDiagnosticCode::CapabilityMetadataMismatch,
855            export.clone(),
856            "capability metadata did not match the requested expectation",
857            "rebuild or select a capability whose metadata matches the caller expectation",
858        ),
859        other @ (LeanWorkerError::Spawn { .. }
860        | LeanWorkerError::CapabilityBuild { .. }
861        | LeanWorkerError::Setup { .. }
862        | LeanWorkerError::Protocol { .. }
863        | LeanWorkerError::Worker { .. }
864        | LeanWorkerError::ChildExited { .. }
865        | LeanWorkerError::ChildPanicOrAbort { .. }
866        | LeanWorkerError::Timeout { .. }
867        | LeanWorkerError::Cancelled { .. }
868        | LeanWorkerError::ProgressPanic { .. }
869        | LeanWorkerError::DataSinkPanic { .. }
870        | LeanWorkerError::DiagnosticSinkPanic { .. }
871        | LeanWorkerError::StreamExportFailed { .. }
872        | LeanWorkerError::StreamCallbackFailed { .. }
873        | LeanWorkerError::StreamRowMalformed { .. }
874        | LeanWorkerError::CapabilityMetadataMalformed { .. }
875        | LeanWorkerError::CapabilityDoctorMalformed { .. }
876        | LeanWorkerError::TypedCommandRequestEncode { .. }
877        | LeanWorkerError::TypedCommandResponseDecode { .. }
878        | LeanWorkerError::TypedCommandRowDecode { .. }
879        | LeanWorkerError::TypedCommandSummaryDecode { .. }
880        | LeanWorkerError::LeaseInvalidated { .. }
881        | LeanWorkerError::WorkerPoolExhausted { .. }
882        | LeanWorkerError::WorkerPoolMemoryBudgetExceeded { .. }
883        | LeanWorkerError::WorkerPoolQueueTimeout { .. }
884        | LeanWorkerError::UnsupportedRequest { .. }
885        | LeanWorkerError::Wait { .. }) => LeanWorkerBootstrapCheck::error(
886            LeanWorkerBootstrapDiagnosticCode::WorkerStartupFailed,
887            "worker bootstrap",
888            other.to_string(),
889            "run the bootstrap check in a deployment environment and rebuild the worker child or capability artifact",
890        ),
891    }
892}
893
894fn format_paths(paths: &[PathBuf]) -> String {
895    if paths.is_empty() {
896        return "<none>".to_owned();
897    }
898    paths
899        .iter()
900        .map(|path| path.display().to_string())
901        .collect::<Vec<_>>()
902        .join(", ")
903}
904
905fn bound_bootstrap_text(mut text: String) -> String {
906    const LIMIT: usize = 1_024;
907    if text.len() <= LIMIT {
908        return text;
909    }
910    while !text.is_char_boundary(LIMIT) {
911        text.pop();
912    }
913    text.truncate(LIMIT);
914    text.push_str("...");
915    text
916}
917
918fn candidate_sibling_worker_paths(executable_name: &str) -> Vec<PathBuf> {
919    let mut tried = Vec::new();
920    if let Ok(current_exe) = env::current_exe() {
921        if let Some(dir) = current_exe.parent() {
922            tried.push(dir.join(executable_name));
923        }
924        if let Some(profile_dir) = current_exe.parent().and_then(Path::parent) {
925            tried.push(profile_dir.join(executable_name));
926        }
927    }
928    tried
929}
930
931fn with_exe_suffix(mut executable_name: String) -> String {
932    if !env::consts::EXE_SUFFIX.is_empty() && !executable_name.ends_with(env::consts::EXE_SUFFIX) {
933        executable_name.push_str(env::consts::EXE_SUFFIX);
934    }
935    executable_name
936}
937
938fn infer_lake_project_root_from_dylib(dylib_path: &Path) -> Result<PathBuf, LeanWorkerError> {
939    let lib_dir = dylib_path.parent();
940    let build_dir = lib_dir.and_then(Path::parent);
941    let lake_dir = build_dir.and_then(Path::parent);
942    let project_root = lake_dir.and_then(Path::parent);
943    match (lib_dir, build_dir, lake_dir, project_root) {
944        (Some(lib), Some(build), Some(lake), Some(root))
945            if lib.file_name().is_some_and(|name| name == "lib")
946                && build.file_name().is_some_and(|name| name == "build")
947                && lake.file_name().is_some_and(|name| name == ".lake") =>
948        {
949            Ok(root.to_path_buf())
950        }
951        _ => Err(LeanWorkerError::Setup {
952            message: format!(
953                "built capability dylib '{}' is not under a standard .lake/build/lib directory",
954                dylib_path.display()
955            ),
956        }),
957    }
958}
959
960fn try_build_workspace_worker_child(executable_name: &str, tried: &mut Vec<PathBuf>) -> Option<PathBuf> {
961    let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
962    let workspace = manifest_dir.parent()?.parent()?;
963    if !workspace
964        .join("crates")
965        .join("lean-rs-worker")
966        .join("Cargo.toml")
967        .is_file()
968    {
969        return None;
970    }
971
972    let debug = workspace.join("target").join("debug").join(executable_name);
973    let release = workspace.join("target").join("release").join(executable_name);
974    tried.push(debug.clone());
975    tried.push(release.clone());
976    if debug.is_file() {
977        return Some(debug);
978    }
979    if release.is_file() {
980        return Some(release);
981    }
982
983    let cargo = env::var_os("CARGO").unwrap_or_else(|| "cargo".into());
984    let status = Command::new(cargo)
985        .current_dir(workspace)
986        .args(["build", "-p", "lean-rs-worker", "--bin", "lean-rs-worker-child"])
987        .status()
988        .ok()?;
989    if !status.success() {
990        return None;
991    }
992    debug.is_file().then_some(debug)
993}
994
995fn dedup_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
996    let mut unique = Vec::new();
997    for path in paths {
998        if !unique.iter().any(|existing| existing == path) {
999            unique.push(path.clone());
1000        }
1001    }
1002    unique
1003}