fharness 0.1.0 - Docs.rs

//! Agent harness orchestration APIs.

use std::collections::HashSet;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::sync::Arc;
use std::time::SystemTime;

use fchat::{
    ChatError, ChatEvent, ChatPolicy, ChatService, ChatSession, ChatTurnRequest, ChatTurnResult,
};
use fcommon::{BoxFuture, SessionId};
use fmemory::{
    FeatureRecord, MemoryBackend, MemoryConversationStore, MemoryError, ProgressEntry,
    RunCheckpoint, RunStatus, SessionManifest,
};
use fprovider::ModelProvider;
use ftooling::ToolRuntime;
use futures_util::StreamExt;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HarnessErrorKind {
    InvalidRequest,
    Memory,
    Chat,
    Validation,
    HealthCheck,
    NotReady,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HarnessError {
    pub kind: HarnessErrorKind,
    pub message: String,
}

impl HarnessError {
    pub fn new(kind: HarnessErrorKind, message: impl Into<String>) -> Self {
        Self {
            kind,
            message: message.into(),
        }
    }

    pub fn invalid_request(message: impl Into<String>) -> Self {
        Self::new(HarnessErrorKind::InvalidRequest, message)
    }

    pub fn memory(message: impl Into<String>) -> Self {
        Self::new(HarnessErrorKind::Memory, message)
    }

    pub fn chat(message: impl Into<String>) -> Self {
        Self::new(HarnessErrorKind::Chat, message)
    }

    pub fn validation(message: impl Into<String>) -> Self {
        Self::new(HarnessErrorKind::Validation, message)
    }

    pub fn health_check(message: impl Into<String>) -> Self {
        Self::new(HarnessErrorKind::HealthCheck, message)
    }

    pub fn not_ready(message: impl Into<String>) -> Self {
        Self::new(HarnessErrorKind::NotReady, message)
    }
}

impl Display for HarnessError {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}: {}", self.kind, self.message)
    }
}

impl Error for HarnessError {}

impl From<MemoryError> for HarnessError {
    fn from(value: MemoryError) -> Self {
        HarnessError::memory(value.message)
    }
}

impl From<ChatError> for HarnessError {
    fn from(value: ChatError) -> Self {
        HarnessError::chat(value.to_string())
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InitializerRequest {
    pub session_id: SessionId,
    pub run_id: String,
    pub active_branch: String,
    pub current_objective: String,
    pub init_script: Option<String>,
    pub feature_list: Vec<FeatureRecord>,
    pub progress_summary: String,
}

impl InitializerRequest {
    pub fn new(
        session_id: impl Into<SessionId>,
        run_id: impl Into<String>,
        current_objective: impl Into<String>,
    ) -> Self {
        Self {
            session_id: session_id.into(),
            run_id: run_id.into(),
            active_branch: "feature/initializer".to_string(),
            current_objective: current_objective.into(),
            init_script: None,
            feature_list: Vec::new(),
            progress_summary: "Initializer scaffold created".to_string(),
        }
    }

    pub fn with_active_branch(mut self, active_branch: impl Into<String>) -> Self {
        self.active_branch = active_branch.into();
        self
    }

    pub fn with_init_script(mut self, init_script: impl Into<String>) -> Self {
        self.init_script = Some(init_script.into());
        self
    }

    pub fn with_feature_list(mut self, feature_list: Vec<FeatureRecord>) -> Self {
        self.feature_list = feature_list;
        self
    }

    pub fn with_progress_summary(mut self, progress_summary: impl Into<String>) -> Self {
        self.progress_summary = progress_summary.into();
        self
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InitializerResult {
    pub session_id: SessionId,
    pub created: bool,
    pub schema_version: u32,
    pub harness_version: String,
    pub feature_count: usize,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CodingRunRequest {
    pub session: ChatSession,
    pub run_id: String,
    pub stream: bool,
    pub prompt_override: Option<String>,
}

impl CodingRunRequest {
    pub fn new(session: ChatSession, run_id: impl Into<String>) -> Self {
        Self {
            session,
            run_id: run_id.into(),
            stream: false,
            prompt_override: None,
        }
    }

    pub fn enable_streaming(mut self) -> Self {
        self.stream = true;
        self
    }

    pub fn with_prompt_override(mut self, prompt_override: impl Into<String>) -> Self {
        self.prompt_override = Some(prompt_override.into());
        self
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CodingRunResult {
    pub session_id: SessionId,
    pub selected_feature_id: Option<String>,
    pub validated: bool,
    pub no_pending_features: bool,
    pub used_stream: bool,
    pub assistant_message: Option<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HarnessPhase {
    Initializer,
    Coding,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RuntimeRunOutcome {
    Initializer(InitializerResult),
    Coding(CodingRunResult),
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RuntimeRunRequest {
    pub session: ChatSession,
    pub run_id: String,
    pub current_objective: String,
    pub stream: bool,
    pub prompt_override: Option<String>,
    pub init_script: Option<String>,
    pub feature_list: Vec<FeatureRecord>,
    pub active_branch: String,
    pub progress_summary: Option<String>,
}

impl RuntimeRunRequest {
    pub fn new(
        session: ChatSession,
        run_id: impl Into<String>,
        current_objective: impl Into<String>,
    ) -> Self {
        Self {
            session,
            run_id: run_id.into(),
            current_objective: current_objective.into(),
            stream: false,
            prompt_override: None,
            init_script: None,
            feature_list: Vec::new(),
            active_branch: "feature/initializer".to_string(),
            progress_summary: None,
        }
    }

    pub fn enable_streaming(mut self) -> Self {
        self.stream = true;
        self
    }

    pub fn with_prompt_override(mut self, prompt_override: impl Into<String>) -> Self {
        self.prompt_override = Some(prompt_override.into());
        self
    }

    pub fn with_init_script(mut self, init_script: impl Into<String>) -> Self {
        self.init_script = Some(init_script.into());
        self
    }

    pub fn with_feature_list(mut self, feature_list: Vec<FeatureRecord>) -> Self {
        self.feature_list = feature_list;
        self
    }

    pub fn with_active_branch(mut self, active_branch: impl Into<String>) -> Self {
        self.active_branch = active_branch.into();
        self
    }

    pub fn with_progress_summary(mut self, progress_summary: impl Into<String>) -> Self {
        self.progress_summary = Some(progress_summary.into());
        self
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FailFastPolicy {
    pub on_health_check_error: bool,
    pub on_chat_error: bool,
    pub on_validation_failure: bool,
}

impl Default for FailFastPolicy {
    fn default() -> Self {
        Self {
            on_health_check_error: true,
            on_chat_error: false,
            on_validation_failure: true,
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RunPolicy {
    pub max_turns_per_run: usize,
    pub max_features_per_run: usize,
    pub retry_budget: usize,
    pub fail_fast: FailFastPolicy,
}

impl Default for RunPolicy {
    fn default() -> Self {
        Self {
            max_turns_per_run: 1,
            max_features_per_run: 1,
            retry_budget: 0,
            fail_fast: FailFastPolicy::default(),
        }
    }
}

impl RunPolicy {
    pub fn validate(&self) -> Result<(), HarnessError> {
        if self.max_turns_per_run == 0 {
            return Err(HarnessError::invalid_request(
                "run policy requires max_turns_per_run >= 1",
            ));
        }

        if self.max_features_per_run != 1 {
            return Err(HarnessError::invalid_request(
                "run policy requires max_features_per_run = 1 for strict incremental runs",
            ));
        }

        Ok(())
    }
}

pub trait HealthChecker: Send + Sync {
    fn run<'a>(
        &'a self,
        session_id: &'a SessionId,
        init_script: &'a str,
    ) -> BoxFuture<'a, Result<(), HarnessError>>;
}

#[derive(Debug, Default, Clone, Copy)]
pub struct NoopHealthChecker;

impl HealthChecker for NoopHealthChecker {
    fn run<'a>(
        &'a self,
        _session_id: &'a SessionId,
        _init_script: &'a str,
    ) -> BoxFuture<'a, Result<(), HarnessError>> {
        Box::pin(async { Ok(()) })
    }
}

pub trait OutcomeValidator: Send + Sync {
    fn validate<'a>(
        &'a self,
        feature: &'a FeatureRecord,
        result: &'a ChatTurnResult,
    ) -> BoxFuture<'a, Result<bool, HarnessError>>;
}

#[derive(Debug, Default, Clone, Copy)]
pub struct AcceptAllValidator;

impl OutcomeValidator for AcceptAllValidator {
    fn validate<'a>(
        &'a self,
        _feature: &'a FeatureRecord,
        _result: &'a ChatTurnResult,
    ) -> BoxFuture<'a, Result<bool, HarnessError>> {
        Box::pin(async { Ok(true) })
    }
}

pub trait FeatureSelector: Send + Sync {
    fn select(&self, feature_list: &[FeatureRecord]) -> Option<FeatureRecord>;
}

#[derive(Debug, Default, Clone, Copy)]
pub struct FirstPendingFeatureSelector;

impl FeatureSelector for FirstPendingFeatureSelector {
    fn select(&self, feature_list: &[FeatureRecord]) -> Option<FeatureRecord> {
        feature_list.iter().find(|feature| !feature.passes).cloned()
    }
}

pub struct HarnessBuilder {
    memory: Arc<dyn MemoryBackend>,
    provider: Option<Arc<dyn ModelProvider>>,
    tool_runtime: Option<Arc<dyn ToolRuntime>>,
    chat_policy: ChatPolicy,
    health_checker: Arc<dyn HealthChecker>,
    validator: Arc<dyn OutcomeValidator>,
    feature_selector: Arc<dyn FeatureSelector>,
    run_policy: RunPolicy,
    schema_version: u32,
    harness_version: String,
}

impl HarnessBuilder {
    pub fn new(memory: Arc<dyn MemoryBackend>) -> Self {
        Self {
            memory,
            provider: None,
            tool_runtime: None,
            chat_policy: ChatPolicy::default(),
            health_checker: Arc::new(NoopHealthChecker),
            validator: Arc::new(AcceptAllValidator),
            feature_selector: Arc::new(FirstPendingFeatureSelector),
            run_policy: RunPolicy::default(),
            schema_version: SessionManifest::DEFAULT_SCHEMA_VERSION,
            harness_version: SessionManifest::DEFAULT_HARNESS_VERSION.to_string(),
        }
    }

    pub fn provider(mut self, provider: Arc<dyn ModelProvider>) -> Self {
        self.provider = Some(provider);
        self
    }

    pub fn tool_runtime(mut self, tool_runtime: Arc<dyn ToolRuntime>) -> Self {
        self.tool_runtime = Some(tool_runtime);
        self
    }

    pub fn chat_policy(mut self, chat_policy: ChatPolicy) -> Self {
        self.chat_policy = chat_policy;
        self
    }

    pub fn health_checker(mut self, health_checker: Arc<dyn HealthChecker>) -> Self {
        self.health_checker = health_checker;
        self
    }

    pub fn validator(mut self, validator: Arc<dyn OutcomeValidator>) -> Self {
        self.validator = validator;
        self
    }

    pub fn feature_selector(mut self, feature_selector: Arc<dyn FeatureSelector>) -> Self {
        self.feature_selector = feature_selector;
        self
    }

    pub fn run_policy(mut self, run_policy: RunPolicy) -> Self {
        self.run_policy = run_policy;
        self
    }

    pub fn schema_version(mut self, schema_version: u32) -> Self {
        self.schema_version = schema_version;
        self
    }

    pub fn harness_version(mut self, harness_version: impl Into<String>) -> Self {
        self.harness_version = harness_version.into();
        self
    }

    pub fn build(self) -> Result<Harness, HarnessError> {
        self.run_policy.validate()?;

        let provider = self
            .provider
            .ok_or_else(|| HarnessError::not_ready("provider is required to build chat runtime"))?;

        let store = Arc::new(MemoryConversationStore::new(self.memory.clone()));
        let mut chat_builder = ChatService::builder(provider)
            .store(store)
            .policy(self.chat_policy);

        if let Some(tool_runtime) = self.tool_runtime {
            chat_builder = chat_builder.tool_runtime(tool_runtime);
        }

        let chat = Arc::new(chat_builder.build());

        Ok(Harness {
            memory: self.memory,
            chat: Some(chat),
            health_checker: self.health_checker,
            validator: self.validator,
            feature_selector: self.feature_selector,
            run_policy: self.run_policy,
            schema_version: self.schema_version,
            harness_version: self.harness_version,
        })
    }
}

#[derive(Clone)]
pub struct Harness {
    memory: Arc<dyn MemoryBackend>,
    chat: Option<Arc<ChatService>>,
    health_checker: Arc<dyn HealthChecker>,
    validator: Arc<dyn OutcomeValidator>,
    feature_selector: Arc<dyn FeatureSelector>,
    run_policy: RunPolicy,
    schema_version: u32,
    harness_version: String,
}

impl Harness {
    pub const DEFAULT_INIT_SCRIPT: &'static str =
        "#!/usr/bin/env bash\nset -e\npwd\ngit log --oneline -20\n";

    pub fn new(memory: Arc<dyn MemoryBackend>) -> Self {
        Self {
            memory,
            chat: None,
            health_checker: Arc::new(NoopHealthChecker),
            validator: Arc::new(AcceptAllValidator),
            feature_selector: Arc::new(FirstPendingFeatureSelector),
            run_policy: RunPolicy::default(),
            schema_version: SessionManifest::DEFAULT_SCHEMA_VERSION,
            harness_version: SessionManifest::DEFAULT_HARNESS_VERSION.to_string(),
        }
    }

    pub fn builder(memory: Arc<dyn MemoryBackend>) -> HarnessBuilder {
        HarnessBuilder::new(memory)
    }

    pub fn with_chat(mut self, chat: Arc<ChatService>) -> Self {
        self.chat = Some(chat);
        self
    }

    pub fn with_health_checker(mut self, health_checker: Arc<dyn HealthChecker>) -> Self {
        self.health_checker = health_checker;
        self
    }

    pub fn with_validator(mut self, validator: Arc<dyn OutcomeValidator>) -> Self {
        self.validator = validator;
        self
    }

    pub fn with_feature_selector(mut self, feature_selector: Arc<dyn FeatureSelector>) -> Self {
        self.feature_selector = feature_selector;
        self
    }

    pub fn with_run_policy(mut self, run_policy: RunPolicy) -> Result<Self, HarnessError> {
        run_policy.validate()?;
        self.run_policy = run_policy;
        Ok(self)
    }

    pub fn with_schema_version(mut self, schema_version: u32) -> Self {
        self.schema_version = schema_version;
        self
    }

    pub fn with_harness_version(mut self, harness_version: impl Into<String>) -> Self {
        self.harness_version = harness_version.into();
        self
    }

    pub async fn select_phase(&self, session_id: &SessionId) -> Result<HarnessPhase, HarnessError> {
        if self.memory.is_initialized(session_id).await? {
            Ok(HarnessPhase::Coding)
        } else {
            Ok(HarnessPhase::Initializer)
        }
    }

    pub async fn run(&self, request: RuntimeRunRequest) -> Result<RuntimeRunOutcome, HarnessError> {
        let phase = self.select_phase(&request.session.id).await?;
        match phase {
            HarnessPhase::Initializer => {
                let mut initializer = InitializerRequest::new(
                    request.session.id.clone(),
                    request.run_id.clone(),
                    request.current_objective,
                )
                .with_active_branch(request.active_branch);

                if let Some(init_script) = request.init_script {
                    initializer = initializer.with_init_script(init_script);
                }

                if !request.feature_list.is_empty() {
                    initializer = initializer.with_feature_list(request.feature_list);
                }

                if let Some(progress_summary) = request.progress_summary {
                    initializer = initializer.with_progress_summary(progress_summary);
                }

                self.run_initializer(initializer)
                    .await
                    .map(RuntimeRunOutcome::Initializer)
            }
            HarnessPhase::Coding => {
                let mut coding = CodingRunRequest::new(request.session, request.run_id);
                if request.stream {
                    coding = coding.enable_streaming();
                }

                if let Some(prompt_override) = request.prompt_override {
                    coding = coding.with_prompt_override(prompt_override);
                }

                self.run_coding_iteration(coding)
                    .await
                    .map(RuntimeRunOutcome::Coding)
            }
        }
    }

    pub async fn run_initializer(
        &self,
        request: InitializerRequest,
    ) -> Result<InitializerResult, HarnessError> {
        let InitializerRequest {
            session_id,
            run_id,
            active_branch,
            current_objective,
            init_script,
            feature_list,
            progress_summary,
        } = request;

        if current_objective.trim().is_empty() {
            return Err(HarnessError::invalid_request(
                "current_objective must not be empty",
            ));
        }

        let feature_list = if feature_list.is_empty() {
            self.starter_feature_list(&current_objective)
        } else {
            feature_list
        };
        validate_feature_list(&feature_list)?;

        let progress_summary = if progress_summary.trim().is_empty() {
            format!("Initializer scaffold created for objective: {current_objective}")
        } else {
            progress_summary
        };

        let init_script = init_script.unwrap_or_else(|| Self::DEFAULT_INIT_SCRIPT.to_string());

        let mut manifest =
            SessionManifest::new(session_id.clone(), active_branch, current_objective)
                .with_schema_version(self.schema_version)
                .with_harness_version(self.harness_version.clone());
        manifest.init_script = Some(init_script);

        let created = self
            .memory
            .initialize_session_if_missing(
                &session_id,
                manifest,
                feature_list,
                Some(ProgressEntry::new(run_id.clone(), progress_summary)),
                Some(RunCheckpoint::started(run_id)),
            )
            .await?;

        let bootstrap = self.memory.load_bootstrap_state(&session_id).await?;
        let manifest = bootstrap
            .manifest
            .ok_or_else(|| HarnessError::memory("manifest missing after initializer run"))?;

        Ok(InitializerResult {
            session_id: manifest.session_id,
            created,
            schema_version: manifest.schema_version,
            harness_version: manifest.harness_version,
            feature_count: bootstrap.feature_list.len(),
        })
    }

    pub async fn run_coding_iteration(
        &self,
        request: CodingRunRequest,
    ) -> Result<CodingRunResult, HarnessError> {
        let chat = self
            .chat
            .as_ref()
            .ok_or_else(|| HarnessError::not_ready("chat service is not configured in harness"))?;

        let started_at = SystemTime::now();
        self.memory
            .record_run_checkpoint(
                &request.session.id,
                RunCheckpoint::started(request.run_id.clone()),
            )
            .await?;

        let result = self.run_coding_iteration_inner(chat, &request).await;

        match &result {
            Ok(value) => {
                let (status, note) = if value.no_pending_features {
                    (
                        RunStatus::Succeeded,
                        "All required features pass=true in feature_list; completion gate satisfied"
                            .to_string(),
                    )
                } else if value.validated {
                    (
                        RunStatus::Succeeded,
                        format!(
                            "Feature '{}' validated and marked passing; remaining required features still pending",
                            value
                                .selected_feature_id
                                .clone()
                                .unwrap_or_else(|| "unknown".to_string())
                        ),
                    )
                } else {
                    (
                        RunStatus::Failed,
                        format!(
                            "Feature '{}' was not validated; left failing for next run",
                            value
                                .selected_feature_id
                                .clone()
                                .unwrap_or_else(|| "unknown".to_string())
                        ),
                    )
                };

                self.record_final_handoff(&request, started_at, status, note)
                    .await?;
            }
            Err(error) => {
                self.record_final_handoff(
                    &request,
                    started_at,
                    RunStatus::Failed,
                    format!("Run failed: {}", error),
                )
                .await?;
            }
        }

        result
    }

    pub fn starter_feature_list(&self, objective: &str) -> Vec<FeatureRecord> {
        vec![
            feature(
                "initializer.artifacts",
                "functional",
                format!("Initializer artifacts exist for objective: {objective}"),
                [
                    "Create init script metadata",
                    "Create session manifest",
                    "Create starter feature list",
                ],
            ),
            feature(
                "harness.baseline",
                "functional",
                "Baseline harness checks can run before coding iterations",
                [
                    "Run startup script",
                    "Verify workspace status is readable",
                    "Record baseline in progress log",
                ],
            ),
            feature(
                "chat.turn",
                "functional",
                "Chat turn execution path is available",
                [
                    "Create a chat session",
                    "Run one non-streaming turn",
                    "Persist transcript messages",
                ],
            ),
            feature(
                "chat.streaming",
                "functional",
                "Streaming turn execution emits expected events",
                [
                    "Run one streaming turn",
                    "Observe text/tool events",
                    "Observe terminal turn completion",
                ],
            ),
            feature(
                "tool.loop",
                "functional",
                "Tool loop executes and feeds results back into model",
                [
                    "Register at least one tool",
                    "Execute tool call during turn",
                    "Confirm follow-up completion",
                ],
            ),
            feature(
                "quality.regression",
                "quality",
                "Regression test pass status is tracked",
                [
                    "Run crate-level tests",
                    "Capture failures in progress log",
                    "Only mark feature pass after verification",
                ],
            ),
        ]
    }

    async fn run_coding_iteration_inner(
        &self,
        chat: &ChatService,
        request: &CodingRunRequest,
    ) -> Result<CodingRunResult, HarnessError> {
        let bootstrap = self
            .memory
            .load_bootstrap_state(&request.session.id)
            .await?;
        let manifest = bootstrap.manifest.ok_or_else(|| {
            HarnessError::not_ready("session is not initialized; run initializer first")
        })?;

        let init_script = manifest
            .init_script
            .as_deref()
            .unwrap_or(Self::DEFAULT_INIT_SCRIPT);
        if let Err(error) = self
            .health_checker
            .run(&request.session.id, init_script)
            .await
        {
            if self.run_policy.fail_fast.on_health_check_error {
                return Err(error);
            }
        }

        if all_required_features_passed(&bootstrap.feature_list) {
            return Ok(CodingRunResult {
                session_id: request.session.id.clone(),
                selected_feature_id: None,
                validated: true,
                no_pending_features: true,
                used_stream: request.stream,
                assistant_message: None,
            });
        }

        let feature = self.feature_selector.select(&bootstrap.feature_list);

        let Some(feature) = feature else {
            return Err(HarnessError::validation(
                "feature selector returned no work before required features reached passes=true",
            ));
        };

        let mut turns_used = 0usize;
        let mut retries_remaining = self.run_policy.retry_budget;

        while turns_used < self.run_policy.max_turns_per_run {
            turns_used += 1;

            let prompt = request
                .prompt_override
                .clone()
                .unwrap_or_else(|| build_feature_prompt(&feature, &manifest.current_objective));

            let turn_request = if request.stream {
                ChatTurnRequest::builder(request.session.clone(), prompt)
                    .enable_streaming()
                    .build()
            } else {
                ChatTurnRequest::builder(request.session.clone(), prompt).build()
            };

            let turn_result = match self.execute_turn(chat, turn_request).await {
                Ok(result) => result,
                Err(error) => {
                    if self.run_policy.fail_fast.on_chat_error
                        || retries_remaining == 0
                        || turns_used >= self.run_policy.max_turns_per_run
                    {
                        return Err(error);
                    }

                    retries_remaining -= 1;
                    continue;
                }
            };

            let validated = self.validator.validate(&feature, &turn_result).await?;
            if validated {
                self.memory
                    .update_feature_pass(&request.session.id, &feature.id, true)
                    .await?;
                let all_features_passing = self
                    .session_all_required_features_passed(&request.session.id)
                    .await?;

                return Ok(CodingRunResult {
                    session_id: request.session.id.clone(),
                    selected_feature_id: Some(feature.id.clone()),
                    validated: true,
                    no_pending_features: all_features_passing,
                    used_stream: request.stream,
                    assistant_message: Some(turn_result.assistant_message),
                });
            }

            if self.run_policy.fail_fast.on_validation_failure
                || retries_remaining == 0
                || turns_used >= self.run_policy.max_turns_per_run
            {
                return Ok(CodingRunResult {
                    session_id: request.session.id.clone(),
                    selected_feature_id: Some(feature.id.clone()),
                    validated: false,
                    no_pending_features: false,
                    used_stream: request.stream,
                    assistant_message: Some(turn_result.assistant_message),
                });
            }

            retries_remaining -= 1;
        }

        Ok(CodingRunResult {
            session_id: request.session.id.clone(),
            selected_feature_id: Some(feature.id),
            validated: false,
            no_pending_features: false,
            used_stream: request.stream,
            assistant_message: None,
        })
    }

    async fn execute_turn(
        &self,
        chat: &ChatService,
        turn_request: ChatTurnRequest,
    ) -> Result<ChatTurnResult, HarnessError> {
        if turn_request.stream {
            let mut stream = chat.stream_turn(turn_request).await?;
            let mut final_result = None;
            while let Some(item) = stream.next().await {
                match item {
                    Ok(ChatEvent::TurnComplete(turn_result)) => final_result = Some(turn_result),
                    Ok(_) => {}
                    Err(err) => return Err(HarnessError::from(err)),
                }
            }

            final_result
                .ok_or_else(|| HarnessError::chat("stream ended without TurnComplete event"))
        } else {
            chat.run_turn(turn_request)
                .await
                .map_err(HarnessError::from)
        }
    }

    async fn session_all_required_features_passed(
        &self,
        session_id: &SessionId,
    ) -> Result<bool, HarnessError> {
        let bootstrap = self.memory.load_bootstrap_state(session_id).await?;
        Ok(all_required_features_passed(&bootstrap.feature_list))
    }

    async fn record_final_handoff(
        &self,
        request: &CodingRunRequest,
        started_at: SystemTime,
        status: RunStatus,
        note: String,
    ) -> Result<(), HarnessError> {
        self.memory
            .record_run_checkpoint(
                &request.session.id,
                RunCheckpoint {
                    run_id: request.run_id.clone(),
                    started_at,
                    completed_at: Some(SystemTime::now()),
                    status,
                    note: Some(note.clone()),
                },
            )
            .await?;

        self.memory
            .append_progress_entry(
                &request.session.id,
                ProgressEntry::new(request.run_id.clone(), note),
            )
            .await?;

        Ok(())
    }
}

fn feature(
    id: impl Into<String>,
    category: impl Into<String>,
    description: impl Into<String>,
    steps: impl IntoIterator<Item = impl Into<String>>,
) -> FeatureRecord {
    FeatureRecord {
        id: id.into(),
        category: category.into(),
        description: description.into(),
        steps: steps.into_iter().map(Into::into).collect(),
        passes: false,
    }
}

fn validate_feature_list(feature_list: &[FeatureRecord]) -> Result<(), HarnessError> {
    if feature_list.is_empty() {
        return Err(HarnessError::invalid_request(
            "feature_list must contain at least one feature",
        ));
    }

    let mut ids = HashSet::new();
    for feature in feature_list {
        if feature.id.trim().is_empty() {
            return Err(HarnessError::invalid_request(
                "feature_list entries require non-empty id",
            ));
        }

        if !ids.insert(feature.id.clone()) {
            return Err(HarnessError::invalid_request(format!(
                "feature_list contains duplicate id '{}': ids must be unique",
                feature.id
            )));
        }

        if feature.description.trim().is_empty() {
            return Err(HarnessError::invalid_request(format!(
                "feature '{}' must include a non-empty description",
                feature.id
            )));
        }

        if feature.steps.is_empty() {
            return Err(HarnessError::invalid_request(format!(
                "feature '{}' must include at least one validation step",
                feature.id
            )));
        }

        if feature.passes {
            return Err(HarnessError::invalid_request(format!(
                "feature '{}' cannot start with passes=true during initializer phase",
                feature.id
            )));
        }
    }

    Ok(())
}

fn build_feature_prompt(feature: &FeatureRecord, objective: &str) -> String {
    let steps = feature
        .steps
        .iter()
        .map(|step| format!("- {step}"))
        .collect::<Vec<_>>()
        .join("\n");

    format!(
        "Objective: {objective}\n\nWork on one feature incrementally and leave a clean handoff.\n\nFeature: {}\nCategory: {}\nDescription: {}\nValidation steps:\n{}",
        feature.id, feature.category, feature.description, steps
    )
}

fn all_required_features_passed(feature_list: &[FeatureRecord]) -> bool {
    !feature_list.is_empty() && feature_list.iter().all(|feature| feature.passes)
}

#[cfg(test)]
mod tests {
    use std::sync::{Arc, Mutex};

    use fchat::{ChatPolicy, InMemoryConversationStore};
    use fmemory::InMemoryMemoryBackend;
    use fprovider::{
        Message, ModelProvider, ModelRequest, ModelResponse, OutputItem, ProviderFuture,
        ProviderId, StopReason, StreamEvent, TokenUsage, ToolCall, VecEventStream,
    };
    use ftooling::{ToolError, ToolExecutionContext, ToolExecutionResult, ToolFuture, ToolRuntime};

    use super::*;

    #[derive(Debug)]
    struct FakeProvider;

    impl ModelProvider for FakeProvider {
        fn id(&self) -> ProviderId {
            ProviderId::OpenAi
        }

        fn complete<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
            Box::pin(async move {
                Ok(ModelResponse {
                    provider: ProviderId::OpenAi,
                    model: request.model,
                    output: vec![OutputItem::Message(Message::new(
                        fprovider::Role::Assistant,
                        "implemented",
                    ))],
                    stop_reason: StopReason::EndTurn,
                    usage: TokenUsage::default(),
                })
            })
        }

        fn stream<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
        {
            Box::pin(async move {
                let response = ModelResponse {
                    provider: ProviderId::OpenAi,
                    model: request.model,
                    output: vec![OutputItem::Message(Message::new(
                        fprovider::Role::Assistant,
                        "implemented-stream",
                    ))],
                    stop_reason: StopReason::EndTurn,
                    usage: TokenUsage::default(),
                };
                let stream = VecEventStream::new(vec![
                    Ok(StreamEvent::TextDelta("implemented-stream".to_string())),
                    Ok(StreamEvent::ResponseComplete(response)),
                ]);
                Ok(Box::pin(stream) as fprovider::BoxedEventStream<'a>)
            })
        }
    }

    #[derive(Debug, Default)]
    struct RecordingProvider {
        requests: Mutex<Vec<ModelRequest>>,
    }

    impl RecordingProvider {
        fn latest_request(&self) -> ModelRequest {
            self.requests
                .lock()
                .expect("requests lock")
                .last()
                .cloned()
                .expect("at least one request")
        }
    }

    impl ModelProvider for RecordingProvider {
        fn id(&self) -> ProviderId {
            ProviderId::OpenAi
        }

        fn complete<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
            Box::pin(async move {
                self.requests
                    .lock()
                    .expect("requests lock")
                    .push(request.clone());
                Ok(ModelResponse {
                    provider: ProviderId::OpenAi,
                    model: request.model,
                    output: vec![OutputItem::Message(Message::new(
                        fprovider::Role::Assistant,
                        "recorded",
                    ))],
                    stop_reason: StopReason::EndTurn,
                    usage: TokenUsage::default(),
                })
            })
        }

        fn stream<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
        {
            Box::pin(async move {
                self.requests
                    .lock()
                    .expect("requests lock")
                    .push(request.clone());
                let response = ModelResponse {
                    provider: ProviderId::OpenAi,
                    model: request.model,
                    output: vec![OutputItem::Message(Message::new(
                        fprovider::Role::Assistant,
                        "recorded-stream",
                    ))],
                    stop_reason: StopReason::EndTurn,
                    usage: TokenUsage::default(),
                };
                let stream = VecEventStream::new(vec![Ok(StreamEvent::ResponseComplete(response))]);
                Ok(Box::pin(stream) as fprovider::BoxedEventStream<'a>)
            })
        }
    }

    #[derive(Debug)]
    struct ToolLoopProvider;

    impl ModelProvider for ToolLoopProvider {
        fn id(&self) -> ProviderId {
            ProviderId::OpenAi
        }

        fn complete<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
            Box::pin(async move {
                if request.tool_results.is_empty() {
                    Ok(ModelResponse {
                        provider: ProviderId::OpenAi,
                        model: request.model,
                        output: vec![OutputItem::ToolCall(ToolCall {
                            id: "call_tool_1".to_string(),
                            name: "echo".to_string(),
                            arguments: "{}".to_string(),
                        })],
                        stop_reason: StopReason::EndTurn,
                        usage: TokenUsage::default(),
                    })
                } else {
                    Ok(ModelResponse {
                        provider: ProviderId::OpenAi,
                        model: request.model,
                        output: vec![OutputItem::Message(Message::new(
                            fprovider::Role::Assistant,
                            "tool-complete",
                        ))],
                        stop_reason: StopReason::EndTurn,
                        usage: TokenUsage::default(),
                    })
                }
            })
        }

        fn stream<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
        {
            Box::pin(async move {
                let response = ModelResponse {
                    provider: ProviderId::OpenAi,
                    model: request.model,
                    output: vec![OutputItem::Message(Message::new(
                        fprovider::Role::Assistant,
                        "tool-complete",
                    ))],
                    stop_reason: StopReason::EndTurn,
                    usage: TokenUsage::default(),
                };

                let stream = VecEventStream::new(vec![Ok(StreamEvent::ResponseComplete(response))]);
                Ok(Box::pin(stream) as fprovider::BoxedEventStream<'a>)
            })
        }
    }

    #[derive(Debug, Default)]
    struct EchoToolRuntime;

    impl ToolRuntime for EchoToolRuntime {
        fn execute<'a>(
            &'a self,
            tool_call: ToolCall,
            _context: ToolExecutionContext,
        ) -> ToolFuture<'a, Result<ToolExecutionResult, ToolError>> {
            Box::pin(async move {
                Ok(ToolExecutionResult {
                    tool_call_id: tool_call.id,
                    output: "ok".to_string(),
                })
            })
        }
    }

    #[derive(Debug, Default)]
    struct LastPendingFeatureSelector;

    impl FeatureSelector for LastPendingFeatureSelector {
        fn select(&self, feature_list: &[FeatureRecord]) -> Option<FeatureRecord> {
            feature_list
                .iter()
                .rev()
                .find(|feature| !feature.passes)
                .cloned()
        }
    }

    #[derive(Debug, Default)]
    struct RecordingHealthChecker {
        calls: Mutex<u32>,
    }

    impl HealthChecker for RecordingHealthChecker {
        fn run<'a>(
            &'a self,
            _session_id: &'a SessionId,
            _init_script: &'a str,
        ) -> BoxFuture<'a, Result<(), HarnessError>> {
            Box::pin(async move {
                *self.calls.lock().expect("calls lock") += 1;
                Ok(())
            })
        }
    }

    struct AlwaysFailValidator;

    impl OutcomeValidator for AlwaysFailValidator {
        fn validate<'a>(
            &'a self,
            _feature: &'a FeatureRecord,
            _result: &'a ChatTurnResult,
        ) -> BoxFuture<'a, Result<bool, HarnessError>> {
            Box::pin(async { Ok(false) })
        }
    }

    #[derive(Debug, Default)]
    struct NeverSelectFeature;

    impl FeatureSelector for NeverSelectFeature {
        fn select(&self, _feature_list: &[FeatureRecord]) -> Option<FeatureRecord> {
            None
        }
    }

    #[derive(Debug, Default)]
    struct EventuallyPassingValidator {
        calls: Mutex<usize>,
        pass_on_call: usize,
    }

    impl EventuallyPassingValidator {
        fn new(pass_on_call: usize) -> Self {
            Self {
                calls: Mutex::new(0),
                pass_on_call,
            }
        }
    }

    impl OutcomeValidator for EventuallyPassingValidator {
        fn validate<'a>(
            &'a self,
            _feature: &'a FeatureRecord,
            _result: &'a ChatTurnResult,
        ) -> BoxFuture<'a, Result<bool, HarnessError>> {
            Box::pin(async move {
                let mut calls = self.calls.lock().expect("calls lock");
                *calls += 1;
                Ok(*calls >= self.pass_on_call)
            })
        }
    }

    #[derive(Debug, Default)]
    struct FlakyCompletionProvider {
        attempts: Mutex<usize>,
        fail_for_attempts: usize,
    }

    impl FlakyCompletionProvider {
        fn new(fail_for_attempts: usize) -> Self {
            Self {
                attempts: Mutex::new(0),
                fail_for_attempts,
            }
        }
    }

    impl ModelProvider for FlakyCompletionProvider {
        fn id(&self) -> ProviderId {
            ProviderId::OpenAi
        }

        fn complete<'a>(
            &'a self,
            request: ModelRequest,
        ) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
            Box::pin(async move {
                let mut attempts = self.attempts.lock().expect("attempts lock");
                *attempts += 1;
                if *attempts <= self.fail_for_attempts {
                    return Err(fprovider::ProviderError::timeout("transient failure"));
                }

                Ok(ModelResponse {
                    provider: ProviderId::OpenAi,
                    model: request.model,
                    output: vec![OutputItem::Message(Message::new(
                        fprovider::Role::Assistant,
                        "eventual-success",
                    ))],
                    stop_reason: StopReason::EndTurn,
                    usage: TokenUsage::default(),
                })
            })
        }

        fn stream<'a>(
            &'a self,
            _request: ModelRequest,
        ) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
        {
            Box::pin(async {
                Err(fprovider::ProviderError::invalid_request(
                    "stream not used in flaky completion provider",
                ))
            })
        }
    }

    fn build_harness(
        memory: Arc<dyn MemoryBackend>,
        health_checker: Option<Arc<dyn HealthChecker>>,
        validator: Option<Arc<dyn OutcomeValidator>>,
    ) -> Harness {
        let provider = Arc::new(FakeProvider);
        let store = Arc::new(InMemoryConversationStore::new());
        let chat = Arc::new(
            ChatService::builder(provider)
                .store(store)
                .policy(ChatPolicy::default())
                .build(),
        );

        let harness = Harness::new(memory).with_chat(chat);
        let harness = if let Some(health_checker) = health_checker {
            harness.with_health_checker(health_checker)
        } else {
            harness
        };

        if let Some(validator) = validator {
            harness.with_validator(validator)
        } else {
            harness
        }
    }

    async fn initialize_for_tests(harness: &Harness, session_id: &str) {
        harness
            .run_initializer(
                InitializerRequest::new(session_id, "run-init", "prepare coding run")
                    .with_feature_list(vec![FeatureRecord {
                        id: "feature-1".to_string(),
                        category: "functional".to_string(),
                        description: "build one feature".to_string(),
                        steps: vec!["make it work".to_string()],
                        passes: false,
                    }]),
            )
            .await
            .expect("initializer should succeed");
    }

    #[tokio::test]
    async fn initializer_creates_bootstrap_state_on_first_run() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::new(memory.clone());

        let request = InitializerRequest::new("session-1", "run-1", "Build initializer flow")
            .with_init_script("#!/usr/bin/env bash\necho start")
            .with_feature_list(vec![FeatureRecord {
                id: "feature-1".to_string(),
                category: "functional".to_string(),
                description: "initializer creates artifacts".to_string(),
                steps: vec!["write feature list".to_string()],
                passes: false,
            }]);

        let result = harness
            .run_initializer(request)
            .await
            .expect("initializer should succeed");
        assert!(result.created);
        assert_eq!(result.feature_count, 1);
        assert_eq!(
            result.schema_version,
            SessionManifest::DEFAULT_SCHEMA_VERSION
        );

        let state = memory
            .load_bootstrap_state(&SessionId::from("session-1"))
            .await
            .expect("bootstrap should load");
        let manifest = state.manifest.expect("manifest should exist");
        assert!(manifest.init_script.is_some());
    }

    #[tokio::test]
    async fn initializer_is_idempotent_when_session_already_initialized() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::new(memory);

        let first =
            InitializerRequest::new("session-2", "run-1", "Initialize").with_feature_list(vec![
                FeatureRecord {
                    id: "feature-a".to_string(),
                    category: "functional".to_string(),
                    description: "first".to_string(),
                    steps: vec!["step".to_string()],
                    passes: false,
                },
            ]);

        let second = InitializerRequest::new("session-2", "run-2", "Should not overwrite")
            .with_feature_list(vec![FeatureRecord {
                id: "feature-b".to_string(),
                category: "functional".to_string(),
                description: "second".to_string(),
                steps: vec!["step".to_string()],
                passes: false,
            }]);

        let first_result = harness
            .run_initializer(first)
            .await
            .expect("first init should succeed");
        assert!(first_result.created);
        assert_eq!(first_result.feature_count, 1);

        let second_result = harness
            .run_initializer(second)
            .await
            .expect("second init should succeed");
        assert!(!second_result.created);
        assert_eq!(second_result.feature_count, 1);
    }

    #[tokio::test]
    async fn initializer_rejects_empty_objective() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::new(memory);

        let request = InitializerRequest::new("session-3", "run-1", "   ");
        let error = harness
            .run_initializer(request)
            .await
            .expect_err("initializer should fail");

        assert_eq!(error.kind, HarnessErrorKind::InvalidRequest);
    }

    #[tokio::test]
    async fn initializer_generates_starter_feature_list_when_missing() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::new(memory);

        let request = InitializerRequest::new("session-4", "run-1", "Build coding harness");
        let result = harness
            .run_initializer(request)
            .await
            .expect("initializer should succeed");

        assert!(result.created);
        assert!(result.feature_count >= 4);
    }

    #[tokio::test]
    async fn initializer_rejects_duplicate_or_passing_features() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::new(memory);

        let duplicate_features = vec![
            FeatureRecord {
                id: "dup".to_string(),
                category: "functional".to_string(),
                description: "first".to_string(),
                steps: vec!["step".to_string()],
                passes: false,
            },
            FeatureRecord {
                id: "dup".to_string(),
                category: "functional".to_string(),
                description: "second".to_string(),
                steps: vec!["step".to_string()],
                passes: false,
            },
        ];

        let duplicate_error = Harness::new(Arc::new(InMemoryMemoryBackend::new()))
            .run_initializer(
                InitializerRequest::new("session-5", "run-1", "Init")
                    .with_feature_list(duplicate_features),
            )
            .await
            .expect_err("duplicate ids should fail");
        assert_eq!(duplicate_error.kind, HarnessErrorKind::InvalidRequest);

        let passing_error = harness
            .run_initializer(
                InitializerRequest::new("session-6", "run-1", "Init").with_feature_list(vec![
                    FeatureRecord {
                        id: "done".to_string(),
                        category: "functional".to_string(),
                        description: "already done".to_string(),
                        steps: vec!["step".to_string()],
                        passes: true,
                    },
                ]),
            )
            .await
            .expect_err("pre-passing feature should fail");
        assert_eq!(passing_error.kind, HarnessErrorKind::InvalidRequest);
    }

    #[tokio::test]
    async fn coding_iteration_gets_bearings_executes_and_marks_feature_passed() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let health = Arc::new(RecordingHealthChecker::default());
        let harness = build_harness(memory.clone(), Some(health.clone()), None);
        initialize_for_tests(&harness, "session-coding").await;

        let session = ChatSession::new("session-coding", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-code-1"))
            .await
            .expect("coding run should succeed");

        assert!(result.no_pending_features);
        assert!(result.validated);
        assert_eq!(result.selected_feature_id.as_deref(), Some("feature-1"));

        let calls = health.calls.lock().expect("calls lock");
        assert_eq!(*calls, 1);

        let state = memory
            .load_bootstrap_state(&SessionId::from("session-coding"))
            .await
            .expect("state should load");
        assert!(state.feature_list[0].passes);
        assert!(
            state
                .recent_progress
                .iter()
                .any(|entry| entry.run_id == "run-code-1")
        );
        assert!(state.checkpoints.iter().any(
            |checkpoint| checkpoint.run_id == "run-code-1" && checkpoint.completed_at.is_some()
        ));
    }

    #[tokio::test]
    async fn coding_iteration_stream_path_works_and_records_handoff() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(memory.clone(), None, None);
        initialize_for_tests(&harness, "session-stream").await;

        let session = ChatSession::new("session-stream", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-stream-1").enable_streaming())
            .await
            .expect("streaming coding run should succeed");

        assert!(result.used_stream);
        assert!(result.validated);

        let state = memory
            .load_bootstrap_state(&SessionId::from("session-stream"))
            .await
            .expect("state should load");
        assert!(
            state
                .recent_progress
                .iter()
                .any(|entry| entry.run_id == "run-stream-1")
        );
    }

    #[tokio::test]
    async fn coding_iteration_does_not_mark_feature_when_not_validated() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(memory.clone(), None, Some(Arc::new(AlwaysFailValidator)));
        initialize_for_tests(&harness, "session-unvalidated").await;

        let session = ChatSession::new("session-unvalidated", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-code-2"))
            .await
            .expect("coding run should complete");

        assert!(!result.validated);

        let state = memory
            .load_bootstrap_state(&SessionId::from("session-unvalidated"))
            .await
            .expect("state should load");
        assert!(!state.feature_list[0].passes);
    }

    #[tokio::test]
    async fn builder_wires_provider_tooling_memory_and_chat() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::builder(memory.clone())
            .provider(Arc::new(ToolLoopProvider))
            .tool_runtime(Arc::new(EchoToolRuntime))
            .build()
            .expect("builder should wire runtime");

        initialize_for_tests(&harness, "session-builder").await;

        let session = ChatSession::new("session-builder", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-builder-1"))
            .await
            .expect("coding run should succeed");

        assert_eq!(result.assistant_message.as_deref(), Some("tool-complete"));

        let transcript = memory
            .load_transcript_messages(&SessionId::from("session-builder"))
            .await
            .expect("transcript should load");
        assert_eq!(transcript.len(), 3);
    }

    #[tokio::test]
    async fn runtime_run_selects_initializer_then_coding_phase() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::builder(memory.clone())
            .provider(Arc::new(FakeProvider))
            .build()
            .expect("builder should succeed");

        let session = ChatSession::new("session-runtime", ProviderId::OpenAi, "gpt-4o-mini");
        let request =
            RuntimeRunRequest::new(session.clone(), "run-auto-1", "phase selector objective")
                .with_feature_list(vec![FeatureRecord {
                    id: "feature-1".to_string(),
                    category: "functional".to_string(),
                    description: "phase selection".to_string(),
                    steps: vec!["initialize then code".to_string()],
                    passes: false,
                }]);

        let first = harness.run(request).await.expect("first phase should run");
        assert!(matches!(first, RuntimeRunOutcome::Initializer(_)));

        let second = harness
            .run(RuntimeRunRequest::new(
                session,
                "run-auto-2",
                "phase selector objective",
            ))
            .await
            .expect("second phase should run");

        assert!(matches!(second, RuntimeRunOutcome::Coding(_)));
    }

    #[tokio::test]
    async fn coding_iteration_uses_feature_selection_strategy() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(memory.clone(), None, None)
            .with_feature_selector(Arc::new(LastPendingFeatureSelector));

        harness
            .run_initializer(
                InitializerRequest::new("session-selector", "run-init", "feature strategy")
                    .with_feature_list(vec![
                        FeatureRecord {
                            id: "feature-a".to_string(),
                            category: "functional".to_string(),
                            description: "first pending".to_string(),
                            steps: vec!["do first".to_string()],
                            passes: false,
                        },
                        FeatureRecord {
                            id: "feature-b".to_string(),
                            category: "functional".to_string(),
                            description: "second pending".to_string(),
                            steps: vec!["do second".to_string()],
                            passes: false,
                        },
                    ]),
            )
            .await
            .expect("initializer should succeed");

        let session = ChatSession::new("session-selector", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-selector-1"))
            .await
            .expect("coding run should succeed");

        assert_eq!(result.selected_feature_id.as_deref(), Some("feature-b"));
    }

    #[tokio::test]
    async fn builder_requires_provider_to_build_runtime() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let error = Harness::builder(memory)
            .build()
            .err()
            .expect("provider should be required");

        assert_eq!(error.kind, HarnessErrorKind::NotReady);
    }

    #[tokio::test]
    async fn select_phase_tracks_session_initialization_state() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::builder(memory.clone())
            .provider(Arc::new(FakeProvider))
            .build()
            .expect("builder should succeed");

        let phase_before = harness
            .select_phase(&SessionId::from("session-phase"))
            .await
            .expect("phase should resolve");
        assert_eq!(phase_before, HarnessPhase::Initializer);

        harness
            .run_initializer(InitializerRequest::new(
                "session-phase",
                "run-init",
                "phase objective",
            ))
            .await
            .expect("initializer should succeed");

        let phase_after = harness
            .select_phase(&SessionId::from("session-phase"))
            .await
            .expect("phase should resolve");
        assert_eq!(phase_after, HarnessPhase::Coding);
    }

    #[tokio::test]
    async fn runtime_run_initializer_applies_initializer_fields() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::builder(memory.clone())
            .provider(Arc::new(FakeProvider))
            .build()
            .expect("builder should succeed");

        let session = ChatSession::new("session-init-fields", ProviderId::OpenAi, "gpt-4o-mini");
        let outcome = harness
            .run(
                RuntimeRunRequest::new(session, "run-init-fields", "objective")
                    .with_active_branch("feature/custom")
                    .with_init_script("#!/usr/bin/env bash\necho init")
                    .with_progress_summary("custom summary")
                    .with_feature_list(vec![FeatureRecord {
                        id: "feature-custom".to_string(),
                        category: "functional".to_string(),
                        description: "custom feature".to_string(),
                        steps: vec!["step".to_string()],
                        passes: false,
                    }]),
            )
            .await
            .expect("runtime run should initialize");

        assert!(matches!(outcome, RuntimeRunOutcome::Initializer(_)));

        let state = memory
            .load_bootstrap_state(&SessionId::from("session-init-fields"))
            .await
            .expect("state should load");
        let manifest = state.manifest.expect("manifest should exist");
        assert_eq!(manifest.active_branch, "feature/custom");
        assert_eq!(
            manifest.init_script.as_deref(),
            Some("#!/usr/bin/env bash\necho init")
        );
        assert!(
            state
                .recent_progress
                .iter()
                .any(|entry| entry.summary == "custom summary")
        );
    }

    #[tokio::test]
    async fn runtime_run_forwards_prompt_override_and_streaming() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let provider = Arc::new(RecordingProvider::default());
        let harness = Harness::builder(memory.clone())
            .provider(provider.clone())
            .build()
            .expect("builder should succeed");

        harness
            .run_initializer(
                InitializerRequest::new("session-runtime-prompt", "run-init", "objective")
                    .with_feature_list(vec![FeatureRecord {
                        id: "feature-1".to_string(),
                        category: "functional".to_string(),
                        description: "check prompt override".to_string(),
                        steps: vec!["override prompt".to_string()],
                        passes: false,
                    }]),
            )
            .await
            .expect("initializer should succeed");

        let session = ChatSession::new("session-runtime-prompt", ProviderId::OpenAi, "gpt-4o-mini");
        let outcome = harness
            .run(
                RuntimeRunRequest::new(session, "run-code", "objective")
                    .with_prompt_override("explicit prompt")
                    .enable_streaming(),
            )
            .await
            .expect("runtime run should code");

        assert!(matches!(outcome, RuntimeRunOutcome::Coding(_)));

        let request = provider.latest_request();
        assert!(request.stream);
        let last_message = request.messages.last().expect("user message should exist");
        assert_eq!(last_message.role, fprovider::Role::User);
        assert_eq!(last_message.content, "explicit prompt");
    }

    #[tokio::test]
    async fn run_policy_enforces_strict_incremental_feature_limit() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let error = Harness::builder(memory)
            .provider(Arc::new(FakeProvider))
            .run_policy(RunPolicy {
                max_turns_per_run: 1,
                max_features_per_run: 2,
                retry_budget: 0,
                fail_fast: FailFastPolicy::default(),
            })
            .build()
            .err()
            .expect("policy should reject non-incremental feature count");

        assert_eq!(error.kind, HarnessErrorKind::InvalidRequest);
    }

    #[tokio::test]
    async fn coding_iteration_retries_validation_when_policy_allows() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(
            memory.clone(),
            None,
            Some(Arc::new(EventuallyPassingValidator::new(2))),
        )
        .with_run_policy(RunPolicy {
            max_turns_per_run: 3,
            max_features_per_run: 1,
            retry_budget: 2,
            fail_fast: FailFastPolicy {
                on_validation_failure: false,
                ..FailFastPolicy::default()
            },
        })
        .expect("run policy should be accepted");

        initialize_for_tests(&harness, "session-retry-validation").await;

        let session = ChatSession::new(
            "session-retry-validation",
            ProviderId::OpenAi,
            "gpt-4o-mini",
        );
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-retry-validation"))
            .await
            .expect("coding run should succeed after retry");

        assert!(result.validated);
    }

    #[tokio::test]
    async fn coding_iteration_stops_when_turn_budget_is_exhausted() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(memory.clone(), None, Some(Arc::new(AlwaysFailValidator)))
            .with_run_policy(RunPolicy {
                max_turns_per_run: 1,
                max_features_per_run: 1,
                retry_budget: 3,
                fail_fast: FailFastPolicy {
                    on_validation_failure: false,
                    ..FailFastPolicy::default()
                },
            })
            .expect("run policy should be accepted");

        initialize_for_tests(&harness, "session-turn-budget").await;

        let session = ChatSession::new("session-turn-budget", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-turn-budget"))
            .await
            .expect("coding run should complete with validation failure");

        assert!(!result.validated);
    }

    #[tokio::test]
    async fn coding_iteration_retries_chat_errors_within_retry_budget() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = Harness::builder(memory.clone())
            .provider(Arc::new(FlakyCompletionProvider::new(1)))
            .validator(Arc::new(AcceptAllValidator))
            .run_policy(RunPolicy {
                max_turns_per_run: 3,
                max_features_per_run: 1,
                retry_budget: 1,
                fail_fast: FailFastPolicy {
                    on_chat_error: false,
                    ..FailFastPolicy::default()
                },
            })
            .build()
            .expect("builder should succeed");

        initialize_for_tests(&harness, "session-chat-retry").await;

        let session = ChatSession::new("session-chat-retry", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-chat-retry"))
            .await
            .expect("chat error should be retried successfully");

        assert!(result.validated);
    }

    #[tokio::test]
    async fn harness_does_not_declare_done_when_selector_returns_none_early() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(memory.clone(), None, None)
            .with_feature_selector(Arc::new(NeverSelectFeature));
        initialize_for_tests(&harness, "session-no-early-done").await;

        let session = ChatSession::new("session-no-early-done", ProviderId::OpenAi, "gpt-4o-mini");
        let error = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-no-early-done"))
            .await
            .expect_err("selector returning none should fail completion gate");

        assert_eq!(error.kind, HarnessErrorKind::Validation);
    }

    #[tokio::test]
    async fn completion_gate_requires_all_features_to_pass_true() {
        let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
        let harness = build_harness(memory.clone(), None, None);

        harness
            .run_initializer(
                InitializerRequest::new("session-completion-gate", "run-init", "completion gate")
                    .with_feature_list(vec![
                        FeatureRecord {
                            id: "feature-1".to_string(),
                            category: "functional".to_string(),
                            description: "first required feature".to_string(),
                            steps: vec!["implement 1".to_string()],
                            passes: false,
                        },
                        FeatureRecord {
                            id: "feature-2".to_string(),
                            category: "functional".to_string(),
                            description: "second required feature".to_string(),
                            steps: vec!["implement 2".to_string()],
                            passes: false,
                        },
                    ]),
            )
            .await
            .expect("initializer should succeed");

        let session =
            ChatSession::new("session-completion-gate", ProviderId::OpenAi, "gpt-4o-mini");
        let result = harness
            .run_coding_iteration(CodingRunRequest::new(session, "run-completion-gate"))
            .await
            .expect("coding run should succeed");

        assert!(result.validated);
        assert!(!result.no_pending_features);
    }
}