lash-core 0.1.0-alpha.34

Sans-IO turn machine and runtime kernel for the lash agent runtime.
Documentation
use super::*;

impl LashRuntime {
    pub fn unregister_plugin_session(&self) -> Result<(), crate::PluginError> {
        if let Some(session) = self.session.as_ref() {
            session
                .plugins()
                .host()
                .unregister_session(&self.state.session_id)?;
        }
        Ok(())
    }

    pub(super) async fn from_host_state(
        policy: SessionPolicy,
        host: RuntimeHost,
        services: RuntimeServices,
        mut state: RuntimeSessionState,
    ) -> Result<Self, SessionError> {
        if state.session_id.is_empty() {
            state.session_id = uuid::Uuid::new_v4().to_string();
        }
        // Defaulted state (e.g. `RuntimeSessionState::default()` used
        // by fresh-session constructors) carries an empty policy.
        // Fill it in from the caller's policy so tests and hosts that
        // pass a real policy alongside default state don't trip the explicit
        // model-spec guard below.
        let state_policy_was_unconfigured = state.policy.recorded_provider_id().is_empty()
            && state.policy.model.id.trim().is_empty();
        if state_policy_was_unconfigured {
            state.policy = policy.clone();
        }
        state.ensure_agent_frame_initialized();
        let state_policy = state.policy.clone();
        if let Some(frame) = state.current_agent_frame_mut()
            && frame.assignment.policy.recorded_provider_id().is_empty()
            && frame.assignment.policy.model.id.trim().is_empty()
        {
            frame.assignment.policy = state_policy;
        }
        state.policy = state.effective_policy().clone();
        state.protocol_turn_options = state.effective_protocol_turn_options().clone();
        normalize_session_graph(&mut state);
        let policy = state.effective_policy().clone();
        if policy.model.id.trim().is_empty() {
            return Err(SessionError::Protocol(
                "session policy missing model spec; hosts must supply explicit model metadata"
                    .to_string(),
            ));
        }
        let mut host = host;
        // When a persistent backend is wired in, wrap the attachment
        // store so every `put` records a write-ahead intent row first.
        // Crashes between put and the next turn commit then surface as
        // uncommitted manifest rows that GC can reconcile. Ephemeral
        // (no-store) runtimes use the inner store directly — there's
        // nothing to reconcile against.
        if let Some(store) = services.store.clone() {
            let manifest: Arc<dyn crate::AttachmentManifest> =
                Arc::new(crate::attachments::PersistenceManifestAdapter(store));
            let scoped: Arc<dyn crate::AttachmentStore> =
                Arc::new(crate::SessionScopedAttachmentStore::new(
                    Arc::clone(&host.core.durability.attachment_store),
                    manifest,
                    state.session_id.clone(),
                ));
            host.core.durability.attachment_store = scoped;
        }
        let services =
            services.with_attachment_store(Arc::clone(&host.core.durability.attachment_store));
        let services = services.with_lashlang_artifact_store(Arc::clone(
            &host.core.durability.lashlang_artifact_store,
        ));
        let mut session = Session::new(services.clone(), &state.session_id).await?;
        if let Some(tool_state) = state.tool_state_snapshot.clone() {
            // Cold rebuild restores the exact persisted tool surface, adopting
            // the snapshot's generation. `apply_state` (a delta-apply that
            // requires `snapshot.generation == base` and bumps) would reject a
            // session whose surface reached generation ≥ 2 onto a fresh base-1
            // registry — the worker-rebuild / restart divergence. `restore_state`
            // adopts the snapshot's generation wholesale, so any generation
            // rebuilds.
            session
                .plugins()
                .tool_registry()
                .restore_state(tool_state)
                .map_err(|err| SessionError::Protocol(err.to_string()))?;
        }
        session.refresh_tool_surface().await?;
        if let Some(snapshot) = state.plugin_snapshot.clone() {
            session
                .plugins()
                .restore(&snapshot)
                .map_err(|err| SessionError::Protocol(err.to_string()))?;
        }
        let protocol_session = Arc::clone(session.plugins().protocol_session());
        let session_id = state.session_id.clone();
        protocol_session
            .restore_session(
                crate::plugin::ProtocolSessionContext::new(&mut session, &session_id),
                &state,
            )
            .await?;
        state.discard_runtime_snapshots();
        session
            .plugins()
            .emit_runtime_event(crate::PluginLifecycleEvent::SessionRestored(
                crate::SessionReadView::from_persisted_state(&state),
            ))
            .await;
        let protocol_turn_options = state.protocol_turn_options.clone();
        Ok(Self {
            session: Some(session),
            policy,
            host,
            services,
            state,
            runtime_scope_id: Arc::<str>::from(uuid::Uuid::new_v4().to_string()),
            managed_sessions: Arc::new(Mutex::new(HashMap::new())),
            managed_turns: Arc::new(Mutex::new(HashMap::new())),
            protocol_turn_options,
            shared_token_ledger: Arc::new(std::sync::Mutex::new(Vec::new())),
            process_sync_needed: Arc::new(AtomicBool::new(false)),
            turn_phase_probe: None,
            residency: Residency::default(),
        })
    }

    /// Build a runtime for an embedded host with no background worker support.
    pub async fn from_embedded_state(
        policy: SessionPolicy,
        host: EmbeddedRuntimeHost,
        services: RuntimeServices,
        state: RuntimeSessionState,
    ) -> Result<Self, SessionError> {
        Self::from_host_state(policy, host.into(), services, state).await
    }

    /// Build a runtime for a host that supports background plugin work.
    pub async fn from_background_state(
        policy: SessionPolicy,
        host: ProcessRuntimeHost,
        services: RuntimeServices,
        state: RuntimeSessionState,
    ) -> Result<Self, SessionError> {
        Self::from_host_state(policy, host.into(), services, state).await
    }

    /// Build a runtime for an embedded host with persistent store support.
    pub async fn from_persistent_embedded_state(
        policy: SessionPolicy,
        host: EmbeddedRuntimeHost,
        services: PersistentRuntimeServices,
        state: RuntimeSessionState,
    ) -> Result<Self, SessionError> {
        Self::from_host_state(policy, host.into(), services.into_runtime_services(), state).await
    }

    /// Build a runtime for a background-capable host with persistent store support.
    pub async fn from_persistent_background_state(
        policy: SessionPolicy,
        host: ProcessRuntimeHost,
        services: PersistentRuntimeServices,
        state: RuntimeSessionState,
    ) -> Result<Self, SessionError> {
        Self::from_host_state(policy, host.into(), services.into_runtime_services(), state).await
    }

    /// Assemble a runtime from already-resolved parts: the single place that maps
    /// `(store, process_registry)` to the right host/services constructor, applies
    /// residency, and stamps it onto the runtime.
    ///
    /// Every construction path — the live open (`from_environment`), the worker
    /// rebuild (`EmbeddedRuntimeBuilder::build`), and child-session
    /// materialization — routes through here so the store/registry wiring and
    /// residency cannot drift between them. That drift previously shipped: the
    /// worker rebuild silently kept the full graph and skipped the persisted
    /// tool-surface restore that the live path applied.
    pub(crate) async fn assemble_runtime(
        policy: SessionPolicy,
        embedded_host: EmbeddedRuntimeHost,
        plugin_session: Arc<crate::PluginSession>,
        store: Option<Arc<dyn crate::store::RuntimePersistence>>,
        process_registry: Option<Arc<dyn ProcessRegistry>>,
        mut state: RuntimeSessionState,
        residency: Residency,
    ) -> Result<Self, SessionError> {
        // ActivePathOnly without a store is a data-loss footgun: trimming drops
        // orphans from RAM with nowhere to reload them from.
        if matches!(residency, Residency::ActivePathOnly) && store.is_none() {
            return Err(SessionError::Protocol(
                "Residency::ActivePathOnly requires a persistent store — \
                 without one, trimmed orphans are irrecoverable"
                    .to_string(),
            ));
        }
        // Heal FIRST (against the full resident set), then trim to the residency.
        // `from_host_state` normalizes again, which is safe on a trimmed graph.
        normalize_session_graph(&mut state);
        apply_residency_on_load(&mut state, residency);
        let mut runtime = match (store, process_registry) {
            (Some(store), Some(registry)) => {
                let host = ProcessRuntimeHost::new(embedded_host, registry);
                let services = PersistentRuntimeServices::new(plugin_session, store);
                Self::from_persistent_background_state(policy, host, services, state).await?
            }
            (Some(store), None) => {
                let services = PersistentRuntimeServices::new(plugin_session, store);
                Self::from_persistent_embedded_state(policy, embedded_host, services, state).await?
            }
            (None, Some(registry)) => {
                let host = ProcessRuntimeHost::new(embedded_host, registry);
                let services = RuntimeServices::new(plugin_session);
                Self::from_background_state(policy, host, services, state).await?
            }
            (None, None) => {
                let services = RuntimeServices::new(plugin_session);
                Self::from_embedded_state(policy, embedded_host, services, state).await?
            }
        };
        runtime.residency = residency;
        Ok(runtime)
    }

    /// Embedder-preferred constructor: build a `LashRuntime` from a
    /// shared `RuntimeEnvironment`.
    ///
    /// Everything expensive (plugin factories, HTTP client pool, prompt
    /// template, path resolver) lives on the environment and is
    /// reused across every runtime the embedder builds. This call is
    /// O(plugin-session-registration + state-hydration), not
    /// O(full-infrastructure-init).
    ///
    /// * `env` — the shared environment. `env.plugin_host` must be set.
    /// * `policy` — per-session policy (model, provider, autonomy, turn limits).
    /// * `state` — persisted session state (empty for a fresh session).
    /// * `store` — per-session store. `None` builds an embedded runtime
    ///   with no persistence; `Some` builds a persistent
    ///   background-capable runtime.
    pub async fn from_environment(
        env: &RuntimeEnvironment,
        policy: SessionPolicy,
        state: RuntimeSessionState,
        store: Option<Arc<dyn crate::store::RuntimePersistence>>,
    ) -> Result<Self, SessionError> {
        let plugin_host = env.plugin_host.as_ref().ok_or_else(|| {
            SessionError::Protocol(
                "RuntimeEnvironment.plugin_host is required for from_environment".to_string(),
            )
        })?;
        let plugin_host = plugin_host.as_ref().clone().with_lashlang_abilities(
            super::builder::lashlang_abilities_for_process_registry(
                plugin_host.lashlang_abilities(),
                env.process_registry.is_some(),
            ),
        );
        let plugin_session = plugin_host
            .build_session(state.session_id.as_str(), state.plugin_snapshot.as_ref())
            .map_err(|err| SessionError::Protocol(err.to_string()))?;
        let mut embedded = EmbeddedRuntimeHost::new(env.core.clone());
        if let Some(factory) = env.session_store_factory.as_ref() {
            embedded = embedded.with_session_store_factory(Arc::clone(factory));
        }
        let mut runtime = Self::assemble_runtime(
            policy,
            embedded,
            plugin_session,
            store,
            env.process_registry.as_ref().cloned(),
            state,
            env.residency,
        )
        .await?;
        // Thread the host's process-work poke onto this session's host so the
        // process control seam can wake the runner after a successful start.
        runtime.host.process_work_poke = env.process_work_poke.clone();
        runtime.host.queued_work_poke = env.queued_work_poke.clone();
        Ok(runtime)
    }

    /// Persist any dirty state and drop the runtime, returning a lightweight
    /// handle the embedder can cache and resume later via
    /// [`LashRuntime::resume`]. This is the webserver-embedder parking
    /// primitive: the handle holds only the session id, policy, and store
    /// reference — no graph nodes, no plugin session, no HTTP client.
    pub async fn park(mut self) -> Result<ParkedSession, SessionError> {
        let store = self.services.store.clone().ok_or_else(|| {
            SessionError::Protocol(
                "park() requires a persistent runtime (store is not set)".to_string(),
            )
        })?;
        let session_id = self.state.session_id.clone();
        let policy = self.policy.clone();
        // Flush any dirty resident state to the store before dropping.
        let commit = crate::store::RuntimeCommit::persisted_state(&self.state, &[]);
        let result = store.commit_runtime_state(commit).await.map_err(|err| {
            SessionError::Protocol(format!("failed to persist runtime state: {err}"))
        })?;
        self.state.apply_persisted_commit_result(result);
        // Drain pending tombstones if any. Under KeepHistory this is a
        // no-op (tombstones never get added). Under DropOrphans,
        // Phase-9's not-yet-wired rewrite path would have populated the
        // set — wired fully in Phase 10's vacuum() design.
        Ok(ParkedSession {
            session_id,
            store,
            policy,
        })
    }

    /// Resume a previously parked session against a shared environment.
    /// Loads only the active-path graph when
    /// `env.residency == ActivePathOnly`; under `KeepAll`
    /// loads the full graph (current behavior).
    pub async fn resume(
        parked: ParkedSession,
        env: &RuntimeEnvironment,
    ) -> Result<Self, SessionError> {
        // Under ActivePathOnly, skip the full-graph load: fetch head
        // metadata + the active-path chain only. Durable impls can
        // ActivePathOnly is an exact store capability. Stores that do
        // not support it must return UnsupportedReadScope; resume does
        // not fall back to a full graph load.
        let loaded = match env.residency {
            Residency::KeepAll => {
                crate::store::load_persisted_session_state(parked.store.as_ref()).await
            }
            Residency::ActivePathOnly => {
                crate::store::load_persisted_session_state_active_path(parked.store.as_ref(), None)
                    .await
            }
        }
        .map_err(|err| SessionError::Protocol(format!("failed to load runtime state: {err}")))?;
        let state = loaded.unwrap_or_else(|| RuntimeSessionState {
            session_id: parked.session_id.clone(),
            policy: parked.policy.clone(),
            ..RuntimeSessionState::default()
        });
        Self::from_environment(env, parked.policy, state, Some(parked.store)).await
    }

    /// Opt-in async read for historic (non-active-path) nodes under
    /// `Residency::ActivePathOnly`. Plugins that walk the full graph
    /// call this instead of `session_graph().find_node()` so missing
    /// nodes surface as `Ok(None)` rather than silently missing.
    pub async fn get_historic_node(
        &self,
        node_id: &str,
    ) -> Result<Option<crate::SessionNodeRecord>, SessionError> {
        if let Some(node) = self.state.session_graph.find_node(node_id) {
            return Ok(Some(node.clone()));
        }
        let store = self.services.store.clone().ok_or_else(|| {
            SessionError::Protocol("get_historic_node() requires a persistent runtime".to_string())
        })?;
        store
            .load_node(node_id)
            .await
            .map_err(|err| SessionError::Protocol(format!("failed to load historic node: {err}")))
    }

    /// Store-resident node IDs that are NOT reachable from the current
    /// leaf — i.e. orphans eligible for tombstoning. lash owns RAM; the
    /// host owns disk lifecycle, so this is a primitive the host calls
    /// on its own schedule (e.g. every N turns, or off-peak).
    ///
    /// Typical autonomous-agent loop:
    ///
    /// ```ignore
    /// let orphans = runtime.orphaned_node_ids().await?;
    /// if !orphans.is_empty() {
    ///     store.tombstone_nodes(&orphans).await;
    /// }
    /// // And less often:
    /// store.vacuum().await;
    /// ```
    pub async fn orphaned_node_ids(&self) -> Result<Vec<String>, SessionError> {
        let store = self.services.store.clone().ok_or_else(|| {
            SessionError::Protocol("orphaned_node_ids() requires a persistent runtime".to_string())
        })?;
        let Some(read) = store
            .load_session(crate::store::SessionReadScope::FullGraph)
            .await
            .map_err(|err| SessionError::Protocol(format!("failed to load full graph: {err}")))?
        else {
            return Ok(Vec::new());
        };
        let active: std::collections::HashSet<&str> = read
            .graph
            .active_path_nodes()
            .iter()
            .map(|node| node.node_id.as_str())
            .collect();
        Ok(read
            .graph
            .nodes
            .iter()
            .filter(|node| !active.contains(node.node_id.as_str()))
            .map(|node| node.node_id.clone())
            .collect())
    }
}