use std::collections::HashSet;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::sync::Arc;
use std::time::SystemTime;
use fchat::{
ChatError, ChatEvent, ChatPolicy, ChatService, ChatSession, ChatTurnRequest, ChatTurnResult,
};
use fcommon::{BoxFuture, SessionId};
use fmemory::{
FeatureRecord, MemoryBackend, MemoryConversationStore, MemoryError, ProgressEntry,
RunCheckpoint, RunStatus, SessionManifest,
};
use fprovider::ModelProvider;
use ftooling::ToolRuntime;
use futures_util::StreamExt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HarnessErrorKind {
InvalidRequest,
Memory,
Chat,
Validation,
HealthCheck,
NotReady,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HarnessError {
pub kind: HarnessErrorKind,
pub message: String,
}
impl HarnessError {
pub fn new(kind: HarnessErrorKind, message: impl Into<String>) -> Self {
Self {
kind,
message: message.into(),
}
}
pub fn invalid_request(message: impl Into<String>) -> Self {
Self::new(HarnessErrorKind::InvalidRequest, message)
}
pub fn memory(message: impl Into<String>) -> Self {
Self::new(HarnessErrorKind::Memory, message)
}
pub fn chat(message: impl Into<String>) -> Self {
Self::new(HarnessErrorKind::Chat, message)
}
pub fn validation(message: impl Into<String>) -> Self {
Self::new(HarnessErrorKind::Validation, message)
}
pub fn health_check(message: impl Into<String>) -> Self {
Self::new(HarnessErrorKind::HealthCheck, message)
}
pub fn not_ready(message: impl Into<String>) -> Self {
Self::new(HarnessErrorKind::NotReady, message)
}
}
impl Display for HarnessError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}: {}", self.kind, self.message)
}
}
impl Error for HarnessError {}
impl From<MemoryError> for HarnessError {
fn from(value: MemoryError) -> Self {
HarnessError::memory(value.message)
}
}
impl From<ChatError> for HarnessError {
fn from(value: ChatError) -> Self {
HarnessError::chat(value.to_string())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InitializerRequest {
pub session_id: SessionId,
pub run_id: String,
pub active_branch: String,
pub current_objective: String,
pub init_script: Option<String>,
pub feature_list: Vec<FeatureRecord>,
pub progress_summary: String,
}
impl InitializerRequest {
pub fn new(
session_id: impl Into<SessionId>,
run_id: impl Into<String>,
current_objective: impl Into<String>,
) -> Self {
Self {
session_id: session_id.into(),
run_id: run_id.into(),
active_branch: "feature/initializer".to_string(),
current_objective: current_objective.into(),
init_script: None,
feature_list: Vec::new(),
progress_summary: "Initializer scaffold created".to_string(),
}
}
pub fn with_active_branch(mut self, active_branch: impl Into<String>) -> Self {
self.active_branch = active_branch.into();
self
}
pub fn with_init_script(mut self, init_script: impl Into<String>) -> Self {
self.init_script = Some(init_script.into());
self
}
pub fn with_feature_list(mut self, feature_list: Vec<FeatureRecord>) -> Self {
self.feature_list = feature_list;
self
}
pub fn with_progress_summary(mut self, progress_summary: impl Into<String>) -> Self {
self.progress_summary = progress_summary.into();
self
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InitializerResult {
pub session_id: SessionId,
pub created: bool,
pub schema_version: u32,
pub harness_version: String,
pub feature_count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CodingRunRequest {
pub session: ChatSession,
pub run_id: String,
pub stream: bool,
pub prompt_override: Option<String>,
}
impl CodingRunRequest {
pub fn new(session: ChatSession, run_id: impl Into<String>) -> Self {
Self {
session,
run_id: run_id.into(),
stream: false,
prompt_override: None,
}
}
pub fn enable_streaming(mut self) -> Self {
self.stream = true;
self
}
pub fn with_prompt_override(mut self, prompt_override: impl Into<String>) -> Self {
self.prompt_override = Some(prompt_override.into());
self
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CodingRunResult {
pub session_id: SessionId,
pub selected_feature_id: Option<String>,
pub validated: bool,
pub no_pending_features: bool,
pub used_stream: bool,
pub assistant_message: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HarnessPhase {
Initializer,
Coding,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RuntimeRunOutcome {
Initializer(InitializerResult),
Coding(CodingRunResult),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RuntimeRunRequest {
pub session: ChatSession,
pub run_id: String,
pub current_objective: String,
pub stream: bool,
pub prompt_override: Option<String>,
pub init_script: Option<String>,
pub feature_list: Vec<FeatureRecord>,
pub active_branch: String,
pub progress_summary: Option<String>,
}
impl RuntimeRunRequest {
pub fn new(
session: ChatSession,
run_id: impl Into<String>,
current_objective: impl Into<String>,
) -> Self {
Self {
session,
run_id: run_id.into(),
current_objective: current_objective.into(),
stream: false,
prompt_override: None,
init_script: None,
feature_list: Vec::new(),
active_branch: "feature/initializer".to_string(),
progress_summary: None,
}
}
pub fn enable_streaming(mut self) -> Self {
self.stream = true;
self
}
pub fn with_prompt_override(mut self, prompt_override: impl Into<String>) -> Self {
self.prompt_override = Some(prompt_override.into());
self
}
pub fn with_init_script(mut self, init_script: impl Into<String>) -> Self {
self.init_script = Some(init_script.into());
self
}
pub fn with_feature_list(mut self, feature_list: Vec<FeatureRecord>) -> Self {
self.feature_list = feature_list;
self
}
pub fn with_active_branch(mut self, active_branch: impl Into<String>) -> Self {
self.active_branch = active_branch.into();
self
}
pub fn with_progress_summary(mut self, progress_summary: impl Into<String>) -> Self {
self.progress_summary = Some(progress_summary.into());
self
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FailFastPolicy {
pub on_health_check_error: bool,
pub on_chat_error: bool,
pub on_validation_failure: bool,
}
impl Default for FailFastPolicy {
fn default() -> Self {
Self {
on_health_check_error: true,
on_chat_error: false,
on_validation_failure: true,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RunPolicy {
pub max_turns_per_run: usize,
pub max_features_per_run: usize,
pub retry_budget: usize,
pub fail_fast: FailFastPolicy,
}
impl Default for RunPolicy {
fn default() -> Self {
Self {
max_turns_per_run: 1,
max_features_per_run: 1,
retry_budget: 0,
fail_fast: FailFastPolicy::default(),
}
}
}
impl RunPolicy {
pub fn validate(&self) -> Result<(), HarnessError> {
if self.max_turns_per_run == 0 {
return Err(HarnessError::invalid_request(
"run policy requires max_turns_per_run >= 1",
));
}
if self.max_features_per_run != 1 {
return Err(HarnessError::invalid_request(
"run policy requires max_features_per_run = 1 for strict incremental runs",
));
}
Ok(())
}
}
pub trait HealthChecker: Send + Sync {
fn run<'a>(
&'a self,
session_id: &'a SessionId,
init_script: &'a str,
) -> BoxFuture<'a, Result<(), HarnessError>>;
}
#[derive(Debug, Default, Clone, Copy)]
pub struct NoopHealthChecker;
impl HealthChecker for NoopHealthChecker {
fn run<'a>(
&'a self,
_session_id: &'a SessionId,
_init_script: &'a str,
) -> BoxFuture<'a, Result<(), HarnessError>> {
Box::pin(async { Ok(()) })
}
}
pub trait OutcomeValidator: Send + Sync {
fn validate<'a>(
&'a self,
feature: &'a FeatureRecord,
result: &'a ChatTurnResult,
) -> BoxFuture<'a, Result<bool, HarnessError>>;
}
#[derive(Debug, Default, Clone, Copy)]
pub struct AcceptAllValidator;
impl OutcomeValidator for AcceptAllValidator {
fn validate<'a>(
&'a self,
_feature: &'a FeatureRecord,
_result: &'a ChatTurnResult,
) -> BoxFuture<'a, Result<bool, HarnessError>> {
Box::pin(async { Ok(true) })
}
}
pub trait FeatureSelector: Send + Sync {
fn select(&self, feature_list: &[FeatureRecord]) -> Option<FeatureRecord>;
}
#[derive(Debug, Default, Clone, Copy)]
pub struct FirstPendingFeatureSelector;
impl FeatureSelector for FirstPendingFeatureSelector {
fn select(&self, feature_list: &[FeatureRecord]) -> Option<FeatureRecord> {
feature_list.iter().find(|feature| !feature.passes).cloned()
}
}
pub struct HarnessBuilder {
memory: Arc<dyn MemoryBackend>,
provider: Option<Arc<dyn ModelProvider>>,
tool_runtime: Option<Arc<dyn ToolRuntime>>,
chat_policy: ChatPolicy,
health_checker: Arc<dyn HealthChecker>,
validator: Arc<dyn OutcomeValidator>,
feature_selector: Arc<dyn FeatureSelector>,
run_policy: RunPolicy,
schema_version: u32,
harness_version: String,
}
impl HarnessBuilder {
pub fn new(memory: Arc<dyn MemoryBackend>) -> Self {
Self {
memory,
provider: None,
tool_runtime: None,
chat_policy: ChatPolicy::default(),
health_checker: Arc::new(NoopHealthChecker),
validator: Arc::new(AcceptAllValidator),
feature_selector: Arc::new(FirstPendingFeatureSelector),
run_policy: RunPolicy::default(),
schema_version: SessionManifest::DEFAULT_SCHEMA_VERSION,
harness_version: SessionManifest::DEFAULT_HARNESS_VERSION.to_string(),
}
}
pub fn provider(mut self, provider: Arc<dyn ModelProvider>) -> Self {
self.provider = Some(provider);
self
}
pub fn tool_runtime(mut self, tool_runtime: Arc<dyn ToolRuntime>) -> Self {
self.tool_runtime = Some(tool_runtime);
self
}
pub fn chat_policy(mut self, chat_policy: ChatPolicy) -> Self {
self.chat_policy = chat_policy;
self
}
pub fn health_checker(mut self, health_checker: Arc<dyn HealthChecker>) -> Self {
self.health_checker = health_checker;
self
}
pub fn validator(mut self, validator: Arc<dyn OutcomeValidator>) -> Self {
self.validator = validator;
self
}
pub fn feature_selector(mut self, feature_selector: Arc<dyn FeatureSelector>) -> Self {
self.feature_selector = feature_selector;
self
}
pub fn run_policy(mut self, run_policy: RunPolicy) -> Self {
self.run_policy = run_policy;
self
}
pub fn schema_version(mut self, schema_version: u32) -> Self {
self.schema_version = schema_version;
self
}
pub fn harness_version(mut self, harness_version: impl Into<String>) -> Self {
self.harness_version = harness_version.into();
self
}
pub fn build(self) -> Result<Harness, HarnessError> {
self.run_policy.validate()?;
let provider = self
.provider
.ok_or_else(|| HarnessError::not_ready("provider is required to build chat runtime"))?;
let store = Arc::new(MemoryConversationStore::new(self.memory.clone()));
let mut chat_builder = ChatService::builder(provider)
.store(store)
.policy(self.chat_policy);
if let Some(tool_runtime) = self.tool_runtime {
chat_builder = chat_builder.tool_runtime(tool_runtime);
}
let chat = Arc::new(chat_builder.build());
Ok(Harness {
memory: self.memory,
chat: Some(chat),
health_checker: self.health_checker,
validator: self.validator,
feature_selector: self.feature_selector,
run_policy: self.run_policy,
schema_version: self.schema_version,
harness_version: self.harness_version,
})
}
}
#[derive(Clone)]
pub struct Harness {
memory: Arc<dyn MemoryBackend>,
chat: Option<Arc<ChatService>>,
health_checker: Arc<dyn HealthChecker>,
validator: Arc<dyn OutcomeValidator>,
feature_selector: Arc<dyn FeatureSelector>,
run_policy: RunPolicy,
schema_version: u32,
harness_version: String,
}
impl Harness {
pub const DEFAULT_INIT_SCRIPT: &'static str =
"#!/usr/bin/env bash\nset -e\npwd\ngit log --oneline -20\n";
pub fn new(memory: Arc<dyn MemoryBackend>) -> Self {
Self {
memory,
chat: None,
health_checker: Arc::new(NoopHealthChecker),
validator: Arc::new(AcceptAllValidator),
feature_selector: Arc::new(FirstPendingFeatureSelector),
run_policy: RunPolicy::default(),
schema_version: SessionManifest::DEFAULT_SCHEMA_VERSION,
harness_version: SessionManifest::DEFAULT_HARNESS_VERSION.to_string(),
}
}
pub fn builder(memory: Arc<dyn MemoryBackend>) -> HarnessBuilder {
HarnessBuilder::new(memory)
}
pub fn with_chat(mut self, chat: Arc<ChatService>) -> Self {
self.chat = Some(chat);
self
}
pub fn with_health_checker(mut self, health_checker: Arc<dyn HealthChecker>) -> Self {
self.health_checker = health_checker;
self
}
pub fn with_validator(mut self, validator: Arc<dyn OutcomeValidator>) -> Self {
self.validator = validator;
self
}
pub fn with_feature_selector(mut self, feature_selector: Arc<dyn FeatureSelector>) -> Self {
self.feature_selector = feature_selector;
self
}
pub fn with_run_policy(mut self, run_policy: RunPolicy) -> Result<Self, HarnessError> {
run_policy.validate()?;
self.run_policy = run_policy;
Ok(self)
}
pub fn with_schema_version(mut self, schema_version: u32) -> Self {
self.schema_version = schema_version;
self
}
pub fn with_harness_version(mut self, harness_version: impl Into<String>) -> Self {
self.harness_version = harness_version.into();
self
}
pub async fn select_phase(&self, session_id: &SessionId) -> Result<HarnessPhase, HarnessError> {
if self.memory.is_initialized(session_id).await? {
Ok(HarnessPhase::Coding)
} else {
Ok(HarnessPhase::Initializer)
}
}
pub async fn run(&self, request: RuntimeRunRequest) -> Result<RuntimeRunOutcome, HarnessError> {
let phase = self.select_phase(&request.session.id).await?;
match phase {
HarnessPhase::Initializer => {
let mut initializer = InitializerRequest::new(
request.session.id.clone(),
request.run_id.clone(),
request.current_objective,
)
.with_active_branch(request.active_branch);
if let Some(init_script) = request.init_script {
initializer = initializer.with_init_script(init_script);
}
if !request.feature_list.is_empty() {
initializer = initializer.with_feature_list(request.feature_list);
}
if let Some(progress_summary) = request.progress_summary {
initializer = initializer.with_progress_summary(progress_summary);
}
self.run_initializer(initializer)
.await
.map(RuntimeRunOutcome::Initializer)
}
HarnessPhase::Coding => {
let mut coding = CodingRunRequest::new(request.session, request.run_id);
if request.stream {
coding = coding.enable_streaming();
}
if let Some(prompt_override) = request.prompt_override {
coding = coding.with_prompt_override(prompt_override);
}
self.run_coding_iteration(coding)
.await
.map(RuntimeRunOutcome::Coding)
}
}
}
pub async fn run_initializer(
&self,
request: InitializerRequest,
) -> Result<InitializerResult, HarnessError> {
let InitializerRequest {
session_id,
run_id,
active_branch,
current_objective,
init_script,
feature_list,
progress_summary,
} = request;
if current_objective.trim().is_empty() {
return Err(HarnessError::invalid_request(
"current_objective must not be empty",
));
}
let feature_list = if feature_list.is_empty() {
self.starter_feature_list(¤t_objective)
} else {
feature_list
};
validate_feature_list(&feature_list)?;
let progress_summary = if progress_summary.trim().is_empty() {
format!("Initializer scaffold created for objective: {current_objective}")
} else {
progress_summary
};
let init_script = init_script.unwrap_or_else(|| Self::DEFAULT_INIT_SCRIPT.to_string());
let mut manifest =
SessionManifest::new(session_id.clone(), active_branch, current_objective)
.with_schema_version(self.schema_version)
.with_harness_version(self.harness_version.clone());
manifest.init_script = Some(init_script);
let created = self
.memory
.initialize_session_if_missing(
&session_id,
manifest,
feature_list,
Some(ProgressEntry::new(run_id.clone(), progress_summary)),
Some(RunCheckpoint::started(run_id)),
)
.await?;
let bootstrap = self.memory.load_bootstrap_state(&session_id).await?;
let manifest = bootstrap
.manifest
.ok_or_else(|| HarnessError::memory("manifest missing after initializer run"))?;
Ok(InitializerResult {
session_id: manifest.session_id,
created,
schema_version: manifest.schema_version,
harness_version: manifest.harness_version,
feature_count: bootstrap.feature_list.len(),
})
}
pub async fn run_coding_iteration(
&self,
request: CodingRunRequest,
) -> Result<CodingRunResult, HarnessError> {
let chat = self
.chat
.as_ref()
.ok_or_else(|| HarnessError::not_ready("chat service is not configured in harness"))?;
let started_at = SystemTime::now();
self.memory
.record_run_checkpoint(
&request.session.id,
RunCheckpoint::started(request.run_id.clone()),
)
.await?;
let result = self.run_coding_iteration_inner(chat, &request).await;
match &result {
Ok(value) => {
let (status, note) = if value.no_pending_features {
(
RunStatus::Succeeded,
"All required features pass=true in feature_list; completion gate satisfied"
.to_string(),
)
} else if value.validated {
(
RunStatus::Succeeded,
format!(
"Feature '{}' validated and marked passing; remaining required features still pending",
value
.selected_feature_id
.clone()
.unwrap_or_else(|| "unknown".to_string())
),
)
} else {
(
RunStatus::Failed,
format!(
"Feature '{}' was not validated; left failing for next run",
value
.selected_feature_id
.clone()
.unwrap_or_else(|| "unknown".to_string())
),
)
};
self.record_final_handoff(&request, started_at, status, note)
.await?;
}
Err(error) => {
self.record_final_handoff(
&request,
started_at,
RunStatus::Failed,
format!("Run failed: {}", error),
)
.await?;
}
}
result
}
pub fn starter_feature_list(&self, objective: &str) -> Vec<FeatureRecord> {
vec![
feature(
"initializer.artifacts",
"functional",
format!("Initializer artifacts exist for objective: {objective}"),
[
"Create init script metadata",
"Create session manifest",
"Create starter feature list",
],
),
feature(
"harness.baseline",
"functional",
"Baseline harness checks can run before coding iterations",
[
"Run startup script",
"Verify workspace status is readable",
"Record baseline in progress log",
],
),
feature(
"chat.turn",
"functional",
"Chat turn execution path is available",
[
"Create a chat session",
"Run one non-streaming turn",
"Persist transcript messages",
],
),
feature(
"chat.streaming",
"functional",
"Streaming turn execution emits expected events",
[
"Run one streaming turn",
"Observe text/tool events",
"Observe terminal turn completion",
],
),
feature(
"tool.loop",
"functional",
"Tool loop executes and feeds results back into model",
[
"Register at least one tool",
"Execute tool call during turn",
"Confirm follow-up completion",
],
),
feature(
"quality.regression",
"quality",
"Regression test pass status is tracked",
[
"Run crate-level tests",
"Capture failures in progress log",
"Only mark feature pass after verification",
],
),
]
}
async fn run_coding_iteration_inner(
&self,
chat: &ChatService,
request: &CodingRunRequest,
) -> Result<CodingRunResult, HarnessError> {
let bootstrap = self
.memory
.load_bootstrap_state(&request.session.id)
.await?;
let manifest = bootstrap.manifest.ok_or_else(|| {
HarnessError::not_ready("session is not initialized; run initializer first")
})?;
let init_script = manifest
.init_script
.as_deref()
.unwrap_or(Self::DEFAULT_INIT_SCRIPT);
if let Err(error) = self
.health_checker
.run(&request.session.id, init_script)
.await
{
if self.run_policy.fail_fast.on_health_check_error {
return Err(error);
}
}
if all_required_features_passed(&bootstrap.feature_list) {
return Ok(CodingRunResult {
session_id: request.session.id.clone(),
selected_feature_id: None,
validated: true,
no_pending_features: true,
used_stream: request.stream,
assistant_message: None,
});
}
let feature = self.feature_selector.select(&bootstrap.feature_list);
let Some(feature) = feature else {
return Err(HarnessError::validation(
"feature selector returned no work before required features reached passes=true",
));
};
let mut turns_used = 0usize;
let mut retries_remaining = self.run_policy.retry_budget;
while turns_used < self.run_policy.max_turns_per_run {
turns_used += 1;
let prompt = request
.prompt_override
.clone()
.unwrap_or_else(|| build_feature_prompt(&feature, &manifest.current_objective));
let turn_request = if request.stream {
ChatTurnRequest::builder(request.session.clone(), prompt)
.enable_streaming()
.build()
} else {
ChatTurnRequest::builder(request.session.clone(), prompt).build()
};
let turn_result = match self.execute_turn(chat, turn_request).await {
Ok(result) => result,
Err(error) => {
if self.run_policy.fail_fast.on_chat_error
|| retries_remaining == 0
|| turns_used >= self.run_policy.max_turns_per_run
{
return Err(error);
}
retries_remaining -= 1;
continue;
}
};
let validated = self.validator.validate(&feature, &turn_result).await?;
if validated {
self.memory
.update_feature_pass(&request.session.id, &feature.id, true)
.await?;
let all_features_passing = self
.session_all_required_features_passed(&request.session.id)
.await?;
return Ok(CodingRunResult {
session_id: request.session.id.clone(),
selected_feature_id: Some(feature.id.clone()),
validated: true,
no_pending_features: all_features_passing,
used_stream: request.stream,
assistant_message: Some(turn_result.assistant_message),
});
}
if self.run_policy.fail_fast.on_validation_failure
|| retries_remaining == 0
|| turns_used >= self.run_policy.max_turns_per_run
{
return Ok(CodingRunResult {
session_id: request.session.id.clone(),
selected_feature_id: Some(feature.id.clone()),
validated: false,
no_pending_features: false,
used_stream: request.stream,
assistant_message: Some(turn_result.assistant_message),
});
}
retries_remaining -= 1;
}
Ok(CodingRunResult {
session_id: request.session.id.clone(),
selected_feature_id: Some(feature.id),
validated: false,
no_pending_features: false,
used_stream: request.stream,
assistant_message: None,
})
}
async fn execute_turn(
&self,
chat: &ChatService,
turn_request: ChatTurnRequest,
) -> Result<ChatTurnResult, HarnessError> {
if turn_request.stream {
let mut stream = chat.stream_turn(turn_request).await?;
let mut final_result = None;
while let Some(item) = stream.next().await {
match item {
Ok(ChatEvent::TurnComplete(turn_result)) => final_result = Some(turn_result),
Ok(_) => {}
Err(err) => return Err(HarnessError::from(err)),
}
}
final_result
.ok_or_else(|| HarnessError::chat("stream ended without TurnComplete event"))
} else {
chat.run_turn(turn_request)
.await
.map_err(HarnessError::from)
}
}
async fn session_all_required_features_passed(
&self,
session_id: &SessionId,
) -> Result<bool, HarnessError> {
let bootstrap = self.memory.load_bootstrap_state(session_id).await?;
Ok(all_required_features_passed(&bootstrap.feature_list))
}
async fn record_final_handoff(
&self,
request: &CodingRunRequest,
started_at: SystemTime,
status: RunStatus,
note: String,
) -> Result<(), HarnessError> {
self.memory
.record_run_checkpoint(
&request.session.id,
RunCheckpoint {
run_id: request.run_id.clone(),
started_at,
completed_at: Some(SystemTime::now()),
status,
note: Some(note.clone()),
},
)
.await?;
self.memory
.append_progress_entry(
&request.session.id,
ProgressEntry::new(request.run_id.clone(), note),
)
.await?;
Ok(())
}
}
fn feature(
id: impl Into<String>,
category: impl Into<String>,
description: impl Into<String>,
steps: impl IntoIterator<Item = impl Into<String>>,
) -> FeatureRecord {
FeatureRecord {
id: id.into(),
category: category.into(),
description: description.into(),
steps: steps.into_iter().map(Into::into).collect(),
passes: false,
}
}
fn validate_feature_list(feature_list: &[FeatureRecord]) -> Result<(), HarnessError> {
if feature_list.is_empty() {
return Err(HarnessError::invalid_request(
"feature_list must contain at least one feature",
));
}
let mut ids = HashSet::new();
for feature in feature_list {
if feature.id.trim().is_empty() {
return Err(HarnessError::invalid_request(
"feature_list entries require non-empty id",
));
}
if !ids.insert(feature.id.clone()) {
return Err(HarnessError::invalid_request(format!(
"feature_list contains duplicate id '{}': ids must be unique",
feature.id
)));
}
if feature.description.trim().is_empty() {
return Err(HarnessError::invalid_request(format!(
"feature '{}' must include a non-empty description",
feature.id
)));
}
if feature.steps.is_empty() {
return Err(HarnessError::invalid_request(format!(
"feature '{}' must include at least one validation step",
feature.id
)));
}
if feature.passes {
return Err(HarnessError::invalid_request(format!(
"feature '{}' cannot start with passes=true during initializer phase",
feature.id
)));
}
}
Ok(())
}
fn build_feature_prompt(feature: &FeatureRecord, objective: &str) -> String {
let steps = feature
.steps
.iter()
.map(|step| format!("- {step}"))
.collect::<Vec<_>>()
.join("\n");
format!(
"Objective: {objective}\n\nWork on one feature incrementally and leave a clean handoff.\n\nFeature: {}\nCategory: {}\nDescription: {}\nValidation steps:\n{}",
feature.id, feature.category, feature.description, steps
)
}
fn all_required_features_passed(feature_list: &[FeatureRecord]) -> bool {
!feature_list.is_empty() && feature_list.iter().all(|feature| feature.passes)
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
use fchat::{ChatPolicy, InMemoryConversationStore};
use fmemory::InMemoryMemoryBackend;
use fprovider::{
Message, ModelProvider, ModelRequest, ModelResponse, OutputItem, ProviderFuture,
ProviderId, StopReason, StreamEvent, TokenUsage, ToolCall, VecEventStream,
};
use ftooling::{ToolError, ToolExecutionContext, ToolExecutionResult, ToolFuture, ToolRuntime};
use super::*;
#[derive(Debug)]
struct FakeProvider;
impl ModelProvider for FakeProvider {
fn id(&self) -> ProviderId {
ProviderId::OpenAi
}
fn complete<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
Box::pin(async move {
Ok(ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"implemented",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
})
})
}
fn stream<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
{
Box::pin(async move {
let response = ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"implemented-stream",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
};
let stream = VecEventStream::new(vec![
Ok(StreamEvent::TextDelta("implemented-stream".to_string())),
Ok(StreamEvent::ResponseComplete(response)),
]);
Ok(Box::pin(stream) as fprovider::BoxedEventStream<'a>)
})
}
}
#[derive(Debug, Default)]
struct RecordingProvider {
requests: Mutex<Vec<ModelRequest>>,
}
impl RecordingProvider {
fn latest_request(&self) -> ModelRequest {
self.requests
.lock()
.expect("requests lock")
.last()
.cloned()
.expect("at least one request")
}
}
impl ModelProvider for RecordingProvider {
fn id(&self) -> ProviderId {
ProviderId::OpenAi
}
fn complete<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
Box::pin(async move {
self.requests
.lock()
.expect("requests lock")
.push(request.clone());
Ok(ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"recorded",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
})
})
}
fn stream<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
{
Box::pin(async move {
self.requests
.lock()
.expect("requests lock")
.push(request.clone());
let response = ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"recorded-stream",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
};
let stream = VecEventStream::new(vec![Ok(StreamEvent::ResponseComplete(response))]);
Ok(Box::pin(stream) as fprovider::BoxedEventStream<'a>)
})
}
}
#[derive(Debug)]
struct ToolLoopProvider;
impl ModelProvider for ToolLoopProvider {
fn id(&self) -> ProviderId {
ProviderId::OpenAi
}
fn complete<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
Box::pin(async move {
if request.tool_results.is_empty() {
Ok(ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::ToolCall(ToolCall {
id: "call_tool_1".to_string(),
name: "echo".to_string(),
arguments: "{}".to_string(),
})],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
})
} else {
Ok(ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"tool-complete",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
})
}
})
}
fn stream<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
{
Box::pin(async move {
let response = ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"tool-complete",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
};
let stream = VecEventStream::new(vec![Ok(StreamEvent::ResponseComplete(response))]);
Ok(Box::pin(stream) as fprovider::BoxedEventStream<'a>)
})
}
}
#[derive(Debug, Default)]
struct EchoToolRuntime;
impl ToolRuntime for EchoToolRuntime {
fn execute<'a>(
&'a self,
tool_call: ToolCall,
_context: ToolExecutionContext,
) -> ToolFuture<'a, Result<ToolExecutionResult, ToolError>> {
Box::pin(async move {
Ok(ToolExecutionResult {
tool_call_id: tool_call.id,
output: "ok".to_string(),
})
})
}
}
#[derive(Debug, Default)]
struct LastPendingFeatureSelector;
impl FeatureSelector for LastPendingFeatureSelector {
fn select(&self, feature_list: &[FeatureRecord]) -> Option<FeatureRecord> {
feature_list
.iter()
.rev()
.find(|feature| !feature.passes)
.cloned()
}
}
#[derive(Debug, Default)]
struct RecordingHealthChecker {
calls: Mutex<u32>,
}
impl HealthChecker for RecordingHealthChecker {
fn run<'a>(
&'a self,
_session_id: &'a SessionId,
_init_script: &'a str,
) -> BoxFuture<'a, Result<(), HarnessError>> {
Box::pin(async move {
*self.calls.lock().expect("calls lock") += 1;
Ok(())
})
}
}
struct AlwaysFailValidator;
impl OutcomeValidator for AlwaysFailValidator {
fn validate<'a>(
&'a self,
_feature: &'a FeatureRecord,
_result: &'a ChatTurnResult,
) -> BoxFuture<'a, Result<bool, HarnessError>> {
Box::pin(async { Ok(false) })
}
}
#[derive(Debug, Default)]
struct NeverSelectFeature;
impl FeatureSelector for NeverSelectFeature {
fn select(&self, _feature_list: &[FeatureRecord]) -> Option<FeatureRecord> {
None
}
}
#[derive(Debug, Default)]
struct EventuallyPassingValidator {
calls: Mutex<usize>,
pass_on_call: usize,
}
impl EventuallyPassingValidator {
fn new(pass_on_call: usize) -> Self {
Self {
calls: Mutex::new(0),
pass_on_call,
}
}
}
impl OutcomeValidator for EventuallyPassingValidator {
fn validate<'a>(
&'a self,
_feature: &'a FeatureRecord,
_result: &'a ChatTurnResult,
) -> BoxFuture<'a, Result<bool, HarnessError>> {
Box::pin(async move {
let mut calls = self.calls.lock().expect("calls lock");
*calls += 1;
Ok(*calls >= self.pass_on_call)
})
}
}
#[derive(Debug, Default)]
struct FlakyCompletionProvider {
attempts: Mutex<usize>,
fail_for_attempts: usize,
}
impl FlakyCompletionProvider {
fn new(fail_for_attempts: usize) -> Self {
Self {
attempts: Mutex::new(0),
fail_for_attempts,
}
}
}
impl ModelProvider for FlakyCompletionProvider {
fn id(&self) -> ProviderId {
ProviderId::OpenAi
}
fn complete<'a>(
&'a self,
request: ModelRequest,
) -> ProviderFuture<'a, Result<ModelResponse, fprovider::ProviderError>> {
Box::pin(async move {
let mut attempts = self.attempts.lock().expect("attempts lock");
*attempts += 1;
if *attempts <= self.fail_for_attempts {
return Err(fprovider::ProviderError::timeout("transient failure"));
}
Ok(ModelResponse {
provider: ProviderId::OpenAi,
model: request.model,
output: vec![OutputItem::Message(Message::new(
fprovider::Role::Assistant,
"eventual-success",
))],
stop_reason: StopReason::EndTurn,
usage: TokenUsage::default(),
})
})
}
fn stream<'a>(
&'a self,
_request: ModelRequest,
) -> ProviderFuture<'a, Result<fprovider::BoxedEventStream<'a>, fprovider::ProviderError>>
{
Box::pin(async {
Err(fprovider::ProviderError::invalid_request(
"stream not used in flaky completion provider",
))
})
}
}
fn build_harness(
memory: Arc<dyn MemoryBackend>,
health_checker: Option<Arc<dyn HealthChecker>>,
validator: Option<Arc<dyn OutcomeValidator>>,
) -> Harness {
let provider = Arc::new(FakeProvider);
let store = Arc::new(InMemoryConversationStore::new());
let chat = Arc::new(
ChatService::builder(provider)
.store(store)
.policy(ChatPolicy::default())
.build(),
);
let harness = Harness::new(memory).with_chat(chat);
let harness = if let Some(health_checker) = health_checker {
harness.with_health_checker(health_checker)
} else {
harness
};
if let Some(validator) = validator {
harness.with_validator(validator)
} else {
harness
}
}
async fn initialize_for_tests(harness: &Harness, session_id: &str) {
harness
.run_initializer(
InitializerRequest::new(session_id, "run-init", "prepare coding run")
.with_feature_list(vec![FeatureRecord {
id: "feature-1".to_string(),
category: "functional".to_string(),
description: "build one feature".to_string(),
steps: vec!["make it work".to_string()],
passes: false,
}]),
)
.await
.expect("initializer should succeed");
}
#[tokio::test]
async fn initializer_creates_bootstrap_state_on_first_run() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::new(memory.clone());
let request = InitializerRequest::new("session-1", "run-1", "Build initializer flow")
.with_init_script("#!/usr/bin/env bash\necho start")
.with_feature_list(vec![FeatureRecord {
id: "feature-1".to_string(),
category: "functional".to_string(),
description: "initializer creates artifacts".to_string(),
steps: vec!["write feature list".to_string()],
passes: false,
}]);
let result = harness
.run_initializer(request)
.await
.expect("initializer should succeed");
assert!(result.created);
assert_eq!(result.feature_count, 1);
assert_eq!(
result.schema_version,
SessionManifest::DEFAULT_SCHEMA_VERSION
);
let state = memory
.load_bootstrap_state(&SessionId::from("session-1"))
.await
.expect("bootstrap should load");
let manifest = state.manifest.expect("manifest should exist");
assert!(manifest.init_script.is_some());
}
#[tokio::test]
async fn initializer_is_idempotent_when_session_already_initialized() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::new(memory);
let first =
InitializerRequest::new("session-2", "run-1", "Initialize").with_feature_list(vec![
FeatureRecord {
id: "feature-a".to_string(),
category: "functional".to_string(),
description: "first".to_string(),
steps: vec!["step".to_string()],
passes: false,
},
]);
let second = InitializerRequest::new("session-2", "run-2", "Should not overwrite")
.with_feature_list(vec![FeatureRecord {
id: "feature-b".to_string(),
category: "functional".to_string(),
description: "second".to_string(),
steps: vec!["step".to_string()],
passes: false,
}]);
let first_result = harness
.run_initializer(first)
.await
.expect("first init should succeed");
assert!(first_result.created);
assert_eq!(first_result.feature_count, 1);
let second_result = harness
.run_initializer(second)
.await
.expect("second init should succeed");
assert!(!second_result.created);
assert_eq!(second_result.feature_count, 1);
}
#[tokio::test]
async fn initializer_rejects_empty_objective() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::new(memory);
let request = InitializerRequest::new("session-3", "run-1", " ");
let error = harness
.run_initializer(request)
.await
.expect_err("initializer should fail");
assert_eq!(error.kind, HarnessErrorKind::InvalidRequest);
}
#[tokio::test]
async fn initializer_generates_starter_feature_list_when_missing() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::new(memory);
let request = InitializerRequest::new("session-4", "run-1", "Build coding harness");
let result = harness
.run_initializer(request)
.await
.expect("initializer should succeed");
assert!(result.created);
assert!(result.feature_count >= 4);
}
#[tokio::test]
async fn initializer_rejects_duplicate_or_passing_features() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::new(memory);
let duplicate_features = vec![
FeatureRecord {
id: "dup".to_string(),
category: "functional".to_string(),
description: "first".to_string(),
steps: vec!["step".to_string()],
passes: false,
},
FeatureRecord {
id: "dup".to_string(),
category: "functional".to_string(),
description: "second".to_string(),
steps: vec!["step".to_string()],
passes: false,
},
];
let duplicate_error = Harness::new(Arc::new(InMemoryMemoryBackend::new()))
.run_initializer(
InitializerRequest::new("session-5", "run-1", "Init")
.with_feature_list(duplicate_features),
)
.await
.expect_err("duplicate ids should fail");
assert_eq!(duplicate_error.kind, HarnessErrorKind::InvalidRequest);
let passing_error = harness
.run_initializer(
InitializerRequest::new("session-6", "run-1", "Init").with_feature_list(vec![
FeatureRecord {
id: "done".to_string(),
category: "functional".to_string(),
description: "already done".to_string(),
steps: vec!["step".to_string()],
passes: true,
},
]),
)
.await
.expect_err("pre-passing feature should fail");
assert_eq!(passing_error.kind, HarnessErrorKind::InvalidRequest);
}
#[tokio::test]
async fn coding_iteration_gets_bearings_executes_and_marks_feature_passed() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let health = Arc::new(RecordingHealthChecker::default());
let harness = build_harness(memory.clone(), Some(health.clone()), None);
initialize_for_tests(&harness, "session-coding").await;
let session = ChatSession::new("session-coding", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-code-1"))
.await
.expect("coding run should succeed");
assert!(result.no_pending_features);
assert!(result.validated);
assert_eq!(result.selected_feature_id.as_deref(), Some("feature-1"));
let calls = health.calls.lock().expect("calls lock");
assert_eq!(*calls, 1);
let state = memory
.load_bootstrap_state(&SessionId::from("session-coding"))
.await
.expect("state should load");
assert!(state.feature_list[0].passes);
assert!(
state
.recent_progress
.iter()
.any(|entry| entry.run_id == "run-code-1")
);
assert!(state.checkpoints.iter().any(
|checkpoint| checkpoint.run_id == "run-code-1" && checkpoint.completed_at.is_some()
));
}
#[tokio::test]
async fn coding_iteration_stream_path_works_and_records_handoff() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(memory.clone(), None, None);
initialize_for_tests(&harness, "session-stream").await;
let session = ChatSession::new("session-stream", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-stream-1").enable_streaming())
.await
.expect("streaming coding run should succeed");
assert!(result.used_stream);
assert!(result.validated);
let state = memory
.load_bootstrap_state(&SessionId::from("session-stream"))
.await
.expect("state should load");
assert!(
state
.recent_progress
.iter()
.any(|entry| entry.run_id == "run-stream-1")
);
}
#[tokio::test]
async fn coding_iteration_does_not_mark_feature_when_not_validated() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(memory.clone(), None, Some(Arc::new(AlwaysFailValidator)));
initialize_for_tests(&harness, "session-unvalidated").await;
let session = ChatSession::new("session-unvalidated", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-code-2"))
.await
.expect("coding run should complete");
assert!(!result.validated);
let state = memory
.load_bootstrap_state(&SessionId::from("session-unvalidated"))
.await
.expect("state should load");
assert!(!state.feature_list[0].passes);
}
#[tokio::test]
async fn builder_wires_provider_tooling_memory_and_chat() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::builder(memory.clone())
.provider(Arc::new(ToolLoopProvider))
.tool_runtime(Arc::new(EchoToolRuntime))
.build()
.expect("builder should wire runtime");
initialize_for_tests(&harness, "session-builder").await;
let session = ChatSession::new("session-builder", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-builder-1"))
.await
.expect("coding run should succeed");
assert_eq!(result.assistant_message.as_deref(), Some("tool-complete"));
let transcript = memory
.load_transcript_messages(&SessionId::from("session-builder"))
.await
.expect("transcript should load");
assert_eq!(transcript.len(), 3);
}
#[tokio::test]
async fn runtime_run_selects_initializer_then_coding_phase() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::builder(memory.clone())
.provider(Arc::new(FakeProvider))
.build()
.expect("builder should succeed");
let session = ChatSession::new("session-runtime", ProviderId::OpenAi, "gpt-4o-mini");
let request =
RuntimeRunRequest::new(session.clone(), "run-auto-1", "phase selector objective")
.with_feature_list(vec![FeatureRecord {
id: "feature-1".to_string(),
category: "functional".to_string(),
description: "phase selection".to_string(),
steps: vec!["initialize then code".to_string()],
passes: false,
}]);
let first = harness.run(request).await.expect("first phase should run");
assert!(matches!(first, RuntimeRunOutcome::Initializer(_)));
let second = harness
.run(RuntimeRunRequest::new(
session,
"run-auto-2",
"phase selector objective",
))
.await
.expect("second phase should run");
assert!(matches!(second, RuntimeRunOutcome::Coding(_)));
}
#[tokio::test]
async fn coding_iteration_uses_feature_selection_strategy() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(memory.clone(), None, None)
.with_feature_selector(Arc::new(LastPendingFeatureSelector));
harness
.run_initializer(
InitializerRequest::new("session-selector", "run-init", "feature strategy")
.with_feature_list(vec![
FeatureRecord {
id: "feature-a".to_string(),
category: "functional".to_string(),
description: "first pending".to_string(),
steps: vec!["do first".to_string()],
passes: false,
},
FeatureRecord {
id: "feature-b".to_string(),
category: "functional".to_string(),
description: "second pending".to_string(),
steps: vec!["do second".to_string()],
passes: false,
},
]),
)
.await
.expect("initializer should succeed");
let session = ChatSession::new("session-selector", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-selector-1"))
.await
.expect("coding run should succeed");
assert_eq!(result.selected_feature_id.as_deref(), Some("feature-b"));
}
#[tokio::test]
async fn builder_requires_provider_to_build_runtime() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let error = Harness::builder(memory)
.build()
.err()
.expect("provider should be required");
assert_eq!(error.kind, HarnessErrorKind::NotReady);
}
#[tokio::test]
async fn select_phase_tracks_session_initialization_state() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::builder(memory.clone())
.provider(Arc::new(FakeProvider))
.build()
.expect("builder should succeed");
let phase_before = harness
.select_phase(&SessionId::from("session-phase"))
.await
.expect("phase should resolve");
assert_eq!(phase_before, HarnessPhase::Initializer);
harness
.run_initializer(InitializerRequest::new(
"session-phase",
"run-init",
"phase objective",
))
.await
.expect("initializer should succeed");
let phase_after = harness
.select_phase(&SessionId::from("session-phase"))
.await
.expect("phase should resolve");
assert_eq!(phase_after, HarnessPhase::Coding);
}
#[tokio::test]
async fn runtime_run_initializer_applies_initializer_fields() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::builder(memory.clone())
.provider(Arc::new(FakeProvider))
.build()
.expect("builder should succeed");
let session = ChatSession::new("session-init-fields", ProviderId::OpenAi, "gpt-4o-mini");
let outcome = harness
.run(
RuntimeRunRequest::new(session, "run-init-fields", "objective")
.with_active_branch("feature/custom")
.with_init_script("#!/usr/bin/env bash\necho init")
.with_progress_summary("custom summary")
.with_feature_list(vec![FeatureRecord {
id: "feature-custom".to_string(),
category: "functional".to_string(),
description: "custom feature".to_string(),
steps: vec!["step".to_string()],
passes: false,
}]),
)
.await
.expect("runtime run should initialize");
assert!(matches!(outcome, RuntimeRunOutcome::Initializer(_)));
let state = memory
.load_bootstrap_state(&SessionId::from("session-init-fields"))
.await
.expect("state should load");
let manifest = state.manifest.expect("manifest should exist");
assert_eq!(manifest.active_branch, "feature/custom");
assert_eq!(
manifest.init_script.as_deref(),
Some("#!/usr/bin/env bash\necho init")
);
assert!(
state
.recent_progress
.iter()
.any(|entry| entry.summary == "custom summary")
);
}
#[tokio::test]
async fn runtime_run_forwards_prompt_override_and_streaming() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let provider = Arc::new(RecordingProvider::default());
let harness = Harness::builder(memory.clone())
.provider(provider.clone())
.build()
.expect("builder should succeed");
harness
.run_initializer(
InitializerRequest::new("session-runtime-prompt", "run-init", "objective")
.with_feature_list(vec![FeatureRecord {
id: "feature-1".to_string(),
category: "functional".to_string(),
description: "check prompt override".to_string(),
steps: vec!["override prompt".to_string()],
passes: false,
}]),
)
.await
.expect("initializer should succeed");
let session = ChatSession::new("session-runtime-prompt", ProviderId::OpenAi, "gpt-4o-mini");
let outcome = harness
.run(
RuntimeRunRequest::new(session, "run-code", "objective")
.with_prompt_override("explicit prompt")
.enable_streaming(),
)
.await
.expect("runtime run should code");
assert!(matches!(outcome, RuntimeRunOutcome::Coding(_)));
let request = provider.latest_request();
assert!(request.stream);
let last_message = request.messages.last().expect("user message should exist");
assert_eq!(last_message.role, fprovider::Role::User);
assert_eq!(last_message.content, "explicit prompt");
}
#[tokio::test]
async fn run_policy_enforces_strict_incremental_feature_limit() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let error = Harness::builder(memory)
.provider(Arc::new(FakeProvider))
.run_policy(RunPolicy {
max_turns_per_run: 1,
max_features_per_run: 2,
retry_budget: 0,
fail_fast: FailFastPolicy::default(),
})
.build()
.err()
.expect("policy should reject non-incremental feature count");
assert_eq!(error.kind, HarnessErrorKind::InvalidRequest);
}
#[tokio::test]
async fn coding_iteration_retries_validation_when_policy_allows() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(
memory.clone(),
None,
Some(Arc::new(EventuallyPassingValidator::new(2))),
)
.with_run_policy(RunPolicy {
max_turns_per_run: 3,
max_features_per_run: 1,
retry_budget: 2,
fail_fast: FailFastPolicy {
on_validation_failure: false,
..FailFastPolicy::default()
},
})
.expect("run policy should be accepted");
initialize_for_tests(&harness, "session-retry-validation").await;
let session = ChatSession::new(
"session-retry-validation",
ProviderId::OpenAi,
"gpt-4o-mini",
);
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-retry-validation"))
.await
.expect("coding run should succeed after retry");
assert!(result.validated);
}
#[tokio::test]
async fn coding_iteration_stops_when_turn_budget_is_exhausted() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(memory.clone(), None, Some(Arc::new(AlwaysFailValidator)))
.with_run_policy(RunPolicy {
max_turns_per_run: 1,
max_features_per_run: 1,
retry_budget: 3,
fail_fast: FailFastPolicy {
on_validation_failure: false,
..FailFastPolicy::default()
},
})
.expect("run policy should be accepted");
initialize_for_tests(&harness, "session-turn-budget").await;
let session = ChatSession::new("session-turn-budget", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-turn-budget"))
.await
.expect("coding run should complete with validation failure");
assert!(!result.validated);
}
#[tokio::test]
async fn coding_iteration_retries_chat_errors_within_retry_budget() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = Harness::builder(memory.clone())
.provider(Arc::new(FlakyCompletionProvider::new(1)))
.validator(Arc::new(AcceptAllValidator))
.run_policy(RunPolicy {
max_turns_per_run: 3,
max_features_per_run: 1,
retry_budget: 1,
fail_fast: FailFastPolicy {
on_chat_error: false,
..FailFastPolicy::default()
},
})
.build()
.expect("builder should succeed");
initialize_for_tests(&harness, "session-chat-retry").await;
let session = ChatSession::new("session-chat-retry", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-chat-retry"))
.await
.expect("chat error should be retried successfully");
assert!(result.validated);
}
#[tokio::test]
async fn harness_does_not_declare_done_when_selector_returns_none_early() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(memory.clone(), None, None)
.with_feature_selector(Arc::new(NeverSelectFeature));
initialize_for_tests(&harness, "session-no-early-done").await;
let session = ChatSession::new("session-no-early-done", ProviderId::OpenAi, "gpt-4o-mini");
let error = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-no-early-done"))
.await
.expect_err("selector returning none should fail completion gate");
assert_eq!(error.kind, HarnessErrorKind::Validation);
}
#[tokio::test]
async fn completion_gate_requires_all_features_to_pass_true() {
let memory: Arc<dyn MemoryBackend> = Arc::new(InMemoryMemoryBackend::new());
let harness = build_harness(memory.clone(), None, None);
harness
.run_initializer(
InitializerRequest::new("session-completion-gate", "run-init", "completion gate")
.with_feature_list(vec![
FeatureRecord {
id: "feature-1".to_string(),
category: "functional".to_string(),
description: "first required feature".to_string(),
steps: vec!["implement 1".to_string()],
passes: false,
},
FeatureRecord {
id: "feature-2".to_string(),
category: "functional".to_string(),
description: "second required feature".to_string(),
steps: vec!["implement 2".to_string()],
passes: false,
},
]),
)
.await
.expect("initializer should succeed");
let session =
ChatSession::new("session-completion-gate", ProviderId::OpenAi, "gpt-4o-mini");
let result = harness
.run_coding_iteration(CodingRunRequest::new(session, "run-completion-gate"))
.await
.expect("coding run should succeed");
assert!(result.validated);
assert!(!result.no_pending_features);
}
}