use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use crate::models::schedule::Schedule;
use serde::{Deserialize, Serialize};
const AUTOMATION_CONFIG_RELATIVE_PATH: &str = "docs/harness/automations.yml";
const FILE_BUDGETS_RELATIVE_PATH: &str = "docs/fitness/file_budgets.json";
const ISSUE_SCANNER_RELATIVE_PATH: &str = ".github/scripts/issue-scanner.py";
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct HarnessAutomationConfigFile {
pub relative_path: String,
pub source: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub schema: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct HarnessAutomationDefinitionSummary {
pub id: String,
pub name: String,
pub description: String,
pub source_type: String,
pub source_label: String,
pub target_type: String,
pub target_label: String,
pub runtime_status: String,
pub pending_count: usize,
pub config_path: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub runtime_binding: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cron_expr: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_run_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub last_run_at: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct HarnessAutomationPendingSignal {
pub id: String,
pub automation_id: String,
pub automation_name: String,
pub signal_type: String,
pub title: String,
pub summary: String,
pub severity: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub relative_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub line_count: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub budget_limit: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub excess_lines: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub defer_until_cron: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct HarnessAutomationRecentRun {
pub automation_id: String,
pub automation_name: String,
pub source_type: String,
pub runtime_binding: String,
pub status: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cron_expr: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub last_run_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_run_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub last_task_id: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct HarnessAutomationReport {
pub generated_at: String,
pub repo_root: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub config_file: Option<HarnessAutomationConfigFile>,
pub definitions: Vec<HarnessAutomationDefinitionSummary>,
pub pending_signals: Vec<HarnessAutomationPendingSignal>,
pub recent_runs: Vec<HarnessAutomationRecentRun>,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct AutomationConfigFile {
schema: Option<String>,
#[serde(default)]
definitions: Vec<AutomationDefinitionConfig>,
}
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct AutomationDefinitionConfig {
id: Option<String>,
name: Option<String>,
description: Option<String>,
#[serde(default)]
source: AutomationSourceConfig,
#[serde(default)]
target: AutomationTargetConfig,
#[serde(default)]
runtime: AutomationRuntimeConfig,
}
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct AutomationSourceConfig {
#[serde(rename = "type")]
type_field: Option<String>,
finding_type: Option<String>,
cron: Option<String>,
timezone: Option<String>,
max_items: Option<usize>,
min_lines: Option<usize>,
defer_until_cron: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct AutomationTargetConfig {
#[serde(rename = "type")]
type_field: Option<String>,
r#ref: Option<String>,
prompt: Option<String>,
agent_id: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct AutomationRuntimeConfig {
schedule_id: Option<String>,
schedule_name: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
struct FileBudgetOverride {
path: Option<String>,
max_lines: Option<usize>,
reason: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
struct FileBudgetConfig {
default_max_lines: Option<usize>,
include_roots: Option<Vec<String>>,
extensions: Option<Vec<String>>,
extension_max_lines: Option<std::collections::HashMap<String, usize>>,
excluded_parts: Option<Vec<String>>,
overrides: Option<Vec<FileBudgetOverride>>,
}
#[derive(Debug, Clone)]
struct LongFileFinding {
relative_path: String,
line_count: usize,
budget_limit: usize,
excess_lines: usize,
severity: String,
}
#[derive(Debug, Clone, Deserialize)]
struct IssueScannerSuspect {
file_a: Option<String>,
file_b: Option<String>,
reason: Option<String>,
#[serde(rename = "type")]
type_field: Option<String>,
}
pub fn detect_repo_automations(
repo_root: &Path,
schedules: &[Schedule],
) -> Result<HarnessAutomationReport, String> {
let mut warnings = Vec::new();
let (config_file, definitions) = load_automation_config(repo_root, &mut warnings);
let finding_definitions = definitions
.iter()
.filter(|definition| definition.source.type_field.as_deref() == Some("finding"))
.collect::<Vec<_>>();
let long_file_findings = if finding_definitions.iter().any(|definition| {
normalize_string(definition.source.finding_type.as_deref())
.map(|value| value == "long-file")
.unwrap_or(true)
}) {
detect_long_file_findings(repo_root, &mut warnings)?
} else {
Vec::new()
};
let issue_scanner_suspects = if finding_definitions.iter().any(|definition| {
normalize_string(definition.source.finding_type.as_deref()).as_deref()
== Some("issue-suspect")
}) {
detect_issue_scanner_suspects(repo_root, &mut warnings)
} else {
Vec::new()
};
let mut definition_summaries = Vec::new();
let mut pending_signals = Vec::new();
let mut recent_runs = Vec::new();
for (index, definition) in definitions.iter().enumerate() {
let Some(id) = normalize_string(definition.id.as_deref()) else {
warnings.push(format!(
"Skipping automation definition at index {index}: missing id."
));
continue;
};
let source_type = match normalize_source_type(definition.source.type_field.as_deref()) {
Some(value) => value,
None => {
warnings.push(format!(
"Skipping automation \"{id}\": unsupported source type \"{}\".",
definition.source.type_field.clone().unwrap_or_default()
));
continue;
}
};
let target_type = match normalize_target_type(definition.target.type_field.as_deref()) {
Some(value) => value,
None => {
warnings.push(format!(
"Skipping automation \"{id}\": unsupported target type \"{}\".",
definition.target.type_field.clone().unwrap_or_default()
));
continue;
}
};
let name = normalize_string(definition.name.as_deref()).unwrap_or_else(|| id.clone());
let description = normalize_string(definition.description.as_deref()).unwrap_or_default();
let matched_schedule = if source_type == "schedule" {
match_runtime_schedule(definition, schedules)
} else {
None
};
let definition_pending = if source_type == "finding" {
build_pending_signals(
definition,
&id,
&name,
&long_file_findings,
&issue_scanner_suspects,
)
} else {
Vec::new()
};
let pending_count = definition_pending.len();
let runtime_status =
compute_definition_status(source_type.as_str(), pending_count, matched_schedule);
if let Some(schedule) = matched_schedule {
recent_runs.push(HarnessAutomationRecentRun {
automation_id: id.clone(),
automation_name: name.clone(),
source_type: "schedule".to_string(),
runtime_binding: schedule.name.clone(),
status: if schedule.enabled {
if schedule.next_run_at.is_some() {
"active".to_string()
} else {
"idle".to_string()
}
} else {
"paused".to_string()
},
cron_expr: Some(schedule.cron_expr.clone()),
last_run_at: schedule.last_run_at.map(|value| value.to_rfc3339()),
next_run_at: schedule.next_run_at.map(|value| value.to_rfc3339()),
last_task_id: schedule.last_task_id.clone(),
});
}
pending_signals.extend(definition_pending);
definition_summaries.push(HarnessAutomationDefinitionSummary {
id,
name,
description,
source_type: source_type.clone(),
source_label: summarize_source(&definition.source, source_type.as_str()),
target_type: target_type.clone(),
target_label: summarize_target(&definition.target, target_type.as_str()),
runtime_status,
pending_count,
config_path: AUTOMATION_CONFIG_RELATIVE_PATH.to_string(),
runtime_binding: resolve_runtime_binding(definition),
cron_expr: normalize_string(definition.source.cron.as_deref())
.or_else(|| matched_schedule.map(|schedule| schedule.cron_expr.clone())),
next_run_at: matched_schedule
.and_then(|schedule| schedule.next_run_at.map(|v| v.to_rfc3339())),
last_run_at: matched_schedule
.and_then(|schedule| schedule.last_run_at.map(|v| v.to_rfc3339())),
});
}
recent_runs.sort_by(|left, right| {
let right_stamp = right
.last_run_at
.as_deref()
.or(right.next_run_at.as_deref())
.unwrap_or("");
let left_stamp = left
.last_run_at
.as_deref()
.or(left.next_run_at.as_deref())
.unwrap_or("");
right_stamp.cmp(left_stamp)
});
Ok(HarnessAutomationReport {
generated_at: chrono::Utc::now().to_rfc3339(),
repo_root: repo_root.display().to_string(),
config_file,
definitions: definition_summaries,
pending_signals,
recent_runs,
warnings,
})
}
fn load_automation_config(
repo_root: &Path,
warnings: &mut Vec<String>,
) -> (
Option<HarnessAutomationConfigFile>,
Vec<AutomationDefinitionConfig>,
) {
let absolute_path = repo_root.join(AUTOMATION_CONFIG_RELATIVE_PATH);
if !absolute_path.exists() {
warnings.push(format!(
"No \"{AUTOMATION_CONFIG_RELATIVE_PATH}\" file found for this repository."
));
return (None, Vec::new());
}
let Ok(source) = fs::read_to_string(&absolute_path) else {
warnings.push(format!(
"Failed to load {AUTOMATION_CONFIG_RELATIVE_PATH}: unable to read file."
));
return (None, Vec::new());
};
match serde_yaml::from_str::<AutomationConfigFile>(&source) {
Ok(parsed) => (
Some(HarnessAutomationConfigFile {
relative_path: AUTOMATION_CONFIG_RELATIVE_PATH.to_string(),
source,
schema: parsed.schema,
}),
parsed.definitions,
),
Err(error) => {
warnings.push(format!(
"Failed to load {AUTOMATION_CONFIG_RELATIVE_PATH}: {error}"
));
(None, Vec::new())
}
}
}
fn normalize_source_type(value: Option<&str>) -> Option<String> {
match normalize_string(value) {
Some(value)
if matches!(
value.as_str(),
"finding" | "schedule" | "review-signal" | "external-event"
) =>
{
Some(value)
}
_ => None,
}
}
fn normalize_target_type(value: Option<&str>) -> Option<String> {
match normalize_string(value) {
Some(value)
if matches!(
value.as_str(),
"specialist" | "workflow" | "background-task"
) =>
{
Some(value)
}
_ => None,
}
}
fn normalize_string(value: Option<&str>) -> Option<String> {
value
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}
fn summarize_source(source: &AutomationSourceConfig, source_type: &str) -> String {
if source_type == "schedule" {
let cron =
normalize_string(source.cron.as_deref()).unwrap_or_else(|| "No cron".to_string());
return match normalize_string(source.timezone.as_deref()) {
Some(timezone) => format!("{cron} · {timezone}"),
None => cron,
};
}
if source_type == "finding" {
let finding_type = normalize_string(source.finding_type.as_deref())
.unwrap_or_else(|| "generic".to_string());
if finding_type == "issue-suspect" {
return match normalize_string(source.defer_until_cron.as_deref()) {
Some(window) => format!("issue-suspect · docs/issues scan · defer {window}"),
None => "issue-suspect · docs/issues scan".to_string(),
};
}
let line_part = source
.min_lines
.map(|value| format!(">= {value} lines"))
.unwrap_or_else(|| "budget overrun".to_string());
return match normalize_string(source.defer_until_cron.as_deref()) {
Some(window) => format!("{finding_type} · {line_part} · defer {window}"),
None => format!("{finding_type} · {line_part}"),
};
}
normalize_string(source.type_field.as_deref()).unwrap_or_else(|| source_type.to_string())
}
fn summarize_target(target: &AutomationTargetConfig, target_type: &str) -> String {
let suffix = normalize_string(target.r#ref.as_deref())
.or_else(|| normalize_string(target.agent_id.as_deref()))
.or_else(|| {
normalize_string(target.prompt.as_deref()).map(|prompt| {
let mut clipped = prompt;
clipped.truncate(72);
clipped
})
})
.unwrap_or_else(|| "Unbound".to_string());
match target_type {
"specialist" => format!("Specialist · {suffix}"),
"workflow" => format!("Workflow · {suffix}"),
"background-task" => format!("Background task · {suffix}"),
_ => suffix,
}
}
fn resolve_runtime_binding(definition: &AutomationDefinitionConfig) -> Option<String> {
normalize_string(definition.runtime.schedule_id.as_deref())
.or_else(|| normalize_string(definition.runtime.schedule_name.as_deref()))
.or_else(|| normalize_string(definition.name.as_deref()))
.or_else(|| normalize_string(definition.id.as_deref()))
}
fn match_runtime_schedule<'a>(
definition: &AutomationDefinitionConfig,
schedules: &'a [Schedule],
) -> Option<&'a Schedule> {
let schedule_id = normalize_string(definition.runtime.schedule_id.as_deref());
let schedule_name = normalize_string(definition.runtime.schedule_name.as_deref());
schedules.iter().find(|schedule| {
schedule_id
.as_ref()
.map(|candidate| candidate == &schedule.id)
.unwrap_or(false)
|| schedule_name
.as_ref()
.map(|candidate| candidate == &schedule.name)
.unwrap_or(false)
})
}
fn build_pending_signals(
definition: &AutomationDefinitionConfig,
automation_id: &str,
automation_name: &str,
findings: &[LongFileFinding],
issue_scanner_suspects: &[IssueScannerSuspect],
) -> Vec<HarnessAutomationPendingSignal> {
if matches!(
normalize_string(definition.source.finding_type.as_deref()).as_deref(),
Some("issue-suspect")
) {
let max_items = definition
.source
.max_items
.unwrap_or(issue_scanner_suspects.len());
let defer_until_cron = normalize_string(definition.source.defer_until_cron.as_deref());
return issue_scanner_suspects
.iter()
.take(max_items)
.enumerate()
.map(|(index, suspect)| {
let primary_file = normalize_string(suspect.file_a.as_deref())
.unwrap_or_else(|| format!("suspect-{}.md", index + 1));
let secondary_file = normalize_string(suspect.file_b.as_deref());
let reason = normalize_string(suspect.reason.as_deref()).unwrap_or_else(|| {
"Issue scanner flagged this item for cleanup review.".to_string()
});
let signal_type = normalize_string(suspect.type_field.as_deref())
.unwrap_or_else(|| "issue-suspect".to_string());
HarnessAutomationPendingSignal {
id: format!("{automation_id}:{primary_file}:{index}"),
automation_id: automation_id.to_string(),
automation_name: automation_name.to_string(),
signal_type: signal_type.clone(),
title: primary_file.clone(),
summary: match secondary_file {
Some(file_b) => format!("{reason} Compare with {file_b}."),
None => reason,
},
severity: classify_issue_suspect_severity(signal_type.as_str()).to_string(),
relative_path: Some(format!("docs/issues/{primary_file}")),
line_count: None,
budget_limit: None,
excess_lines: None,
defer_until_cron: defer_until_cron.clone(),
}
})
.collect();
}
if matches!(
normalize_string(definition.source.finding_type.as_deref()).as_deref(),
Some(value) if value != "long-file"
) {
return Vec::new();
}
let min_lines = definition.source.min_lines.unwrap_or(0);
let max_items = definition.source.max_items.unwrap_or(findings.len());
let defer_until_cron = normalize_string(definition.source.defer_until_cron.as_deref());
findings
.iter()
.filter(|finding| finding.line_count >= min_lines)
.take(max_items)
.map(|finding| HarnessAutomationPendingSignal {
id: format!("{automation_id}:{}", finding.relative_path),
automation_id: automation_id.to_string(),
automation_name: automation_name.to_string(),
signal_type: "long-file".to_string(),
title: finding
.relative_path
.rsplit('/')
.next()
.unwrap_or(&finding.relative_path)
.to_string(),
summary: format!(
"{} lines vs budget {} (+{})",
finding.line_count, finding.budget_limit, finding.excess_lines
),
severity: finding.severity.clone(),
relative_path: Some(finding.relative_path.clone()),
line_count: Some(finding.line_count),
budget_limit: Some(finding.budget_limit),
excess_lines: Some(finding.excess_lines),
defer_until_cron: defer_until_cron.clone(),
})
.collect()
}
fn detect_issue_scanner_suspects(
repo_root: &Path,
warnings: &mut Vec<String>,
) -> Vec<IssueScannerSuspect> {
let absolute_path = repo_root.join(ISSUE_SCANNER_RELATIVE_PATH);
if !absolute_path.exists() {
warnings.push(format!(
"Missing {ISSUE_SCANNER_RELATIVE_PATH}; issue cleanup suspects are unavailable."
));
return Vec::new();
}
let python_bin = if cfg!(windows) { "python" } else { "python3" };
let output = match Command::new(python_bin)
.arg(&absolute_path)
.arg("--suspects-only")
.current_dir(repo_root)
.output()
{
Ok(output) => output,
Err(error) => {
warnings.push(format!(
"Failed to run {ISSUE_SCANNER_RELATIVE_PATH} --suspects-only: {error}"
));
return Vec::new();
}
};
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
warnings.push(format!(
"Failed to run {ISSUE_SCANNER_RELATIVE_PATH} --suspects-only: {}",
if stderr.is_empty() {
format!("exit status {}", output.status)
} else {
stderr
}
));
return Vec::new();
}
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
if stdout.is_empty() || stdout == "No suspects found." {
return Vec::new();
}
match serde_json::from_str::<Vec<IssueScannerSuspect>>(&stdout) {
Ok(suspects) => suspects,
Err(error) => {
warnings.push(format!(
"Unexpected output from {ISSUE_SCANNER_RELATIVE_PATH} --suspects-only: {error}"
));
Vec::new()
}
}
}
fn compute_definition_status(
source_type: &str,
pending_count: usize,
schedule: Option<&Schedule>,
) -> String {
if source_type == "finding" {
return if pending_count > 0 {
"pending".to_string()
} else {
"clear".to_string()
};
}
match schedule {
None => "definition-only".to_string(),
Some(schedule) if !schedule.enabled => "paused".to_string(),
Some(schedule) if schedule.next_run_at.is_some() => "active".to_string(),
Some(_) => "idle".to_string(),
}
}
fn detect_long_file_findings(
repo_root: &Path,
warnings: &mut Vec<String>,
) -> Result<Vec<LongFileFinding>, String> {
let budgets = load_file_budgets(repo_root, warnings);
let include_roots = budgets
.include_roots
.clone()
.unwrap_or_else(|| vec!["src".to_string(), "apps".to_string(), "crates".to_string()]);
let extensions = budgets
.extensions
.clone()
.unwrap_or_else(|| vec![".ts".to_string(), ".tsx".to_string(), ".rs".to_string()]);
let excluded_parts = budgets.excluded_parts.clone().unwrap_or_else(|| {
vec![
"/node_modules/".to_string(),
"/target/".to_string(),
"/.next/".to_string(),
"/_next/".to_string(),
"/bundled/".to_string(),
]
});
let extension_max_lines = budgets.extension_max_lines.clone().unwrap_or_default();
let default_max_lines = budgets.default_max_lines.unwrap_or(1000);
let overrides = budgets.overrides.clone().unwrap_or_default();
let mut candidates = Vec::new();
for root in include_roots {
let absolute_root = repo_root.join(&root);
if absolute_root.is_dir() {
walk_files(&absolute_root, &mut candidates);
}
}
let mut findings = Vec::new();
for absolute_path in candidates {
let relative_path = absolute_path
.strip_prefix(repo_root)
.unwrap_or(&absolute_path)
.to_string_lossy()
.replace('\\', "/");
let extension = Path::new(&relative_path)
.extension()
.map(|value| format!(".{}", value.to_string_lossy().to_lowercase()))
.unwrap_or_default();
if !extensions.iter().any(|candidate| candidate == &extension) {
continue;
}
if !excluded_parts
.iter()
.all(|excluded| !relative_path.contains(excluded))
{
continue;
}
let Ok(source) = fs::read_to_string(&absolute_path) else {
continue;
};
let line_count = count_lines(&source);
let (budget_limit, _reason) = resolve_budget(
&relative_path,
&extension,
default_max_lines,
&extension_max_lines,
&overrides,
);
if line_count <= budget_limit {
continue;
}
let excess_lines = line_count.saturating_sub(budget_limit);
findings.push(LongFileFinding {
relative_path,
line_count,
budget_limit,
excess_lines,
severity: classify_severity(excess_lines).to_string(),
});
}
findings.sort_by(|left, right| {
right
.excess_lines
.cmp(&left.excess_lines)
.then_with(|| left.relative_path.cmp(&right.relative_path))
});
Ok(findings)
}
fn load_file_budgets(repo_root: &Path, warnings: &mut Vec<String>) -> FileBudgetConfig {
let absolute_path = repo_root.join(FILE_BUDGETS_RELATIVE_PATH);
if !absolute_path.exists() {
warnings.push(format!(
"Missing {FILE_BUDGETS_RELATIVE_PATH}; using default long-file budget thresholds."
));
return FileBudgetConfig {
default_max_lines: Some(1000),
include_roots: Some(vec![
"src".to_string(),
"apps".to_string(),
"crates".to_string(),
]),
extensions: Some(vec![
".ts".to_string(),
".tsx".to_string(),
".rs".to_string(),
]),
extension_max_lines: Some(
[
(".rs".to_string(), 800),
(".ts".to_string(), 1000),
(".tsx".to_string(), 1000),
]
.into_iter()
.collect(),
),
excluded_parts: Some(vec![
"/node_modules/".to_string(),
"/target/".to_string(),
"/.next/".to_string(),
"/_next/".to_string(),
"/bundled/".to_string(),
]),
overrides: Some(Vec::new()),
};
}
match fs::read_to_string(&absolute_path)
.ok()
.and_then(|source| serde_json::from_str::<FileBudgetConfig>(&source).ok())
{
Some(config) => config,
None => {
warnings.push(format!(
"Failed to parse {FILE_BUDGETS_RELATIVE_PATH}; using default long-file budget thresholds."
));
FileBudgetConfig {
default_max_lines: Some(1000),
include_roots: Some(vec![
"src".to_string(),
"apps".to_string(),
"crates".to_string(),
]),
extensions: Some(vec![
".ts".to_string(),
".tsx".to_string(),
".rs".to_string(),
]),
extension_max_lines: Some(
[
(".rs".to_string(), 800),
(".ts".to_string(), 1000),
(".tsx".to_string(), 1000),
]
.into_iter()
.collect(),
),
excluded_parts: Some(vec![
"/node_modules/".to_string(),
"/target/".to_string(),
"/.next/".to_string(),
"/_next/".to_string(),
"/bundled/".to_string(),
]),
overrides: Some(Vec::new()),
}
}
}
}
fn walk_files(dir: &Path, collected: &mut Vec<PathBuf>) {
let Ok(entries) = fs::read_dir(dir) else {
return;
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
walk_files(&path, collected);
} else if path.is_file() {
collected.push(path);
}
}
}
fn resolve_budget(
relative_path: &str,
extension: &str,
default_max_lines: usize,
extension_max_lines: &std::collections::HashMap<String, usize>,
overrides: &[FileBudgetOverride],
) -> (usize, Option<String>) {
if let Some(override_entry) = overrides.iter().find(|candidate| {
candidate
.path
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.map(|value| value == relative_path)
.unwrap_or(false)
}) {
if let Some(max_lines) = override_entry.max_lines {
return (max_lines, override_entry.reason.clone());
}
}
(
extension_max_lines
.get(extension)
.copied()
.unwrap_or(default_max_lines),
None,
)
}
fn classify_severity(excess_lines: usize) -> &'static str {
if excess_lines >= 250 {
"high"
} else if excess_lines >= 100 {
"medium"
} else {
"low"
}
}
fn classify_issue_suspect_severity(signal_type: &str) -> &'static str {
match signal_type {
"stale" => "high",
"duplicate" => "medium",
"open_check" => "low",
_ => "medium",
}
}
fn count_lines(source: &str) -> usize {
if source.is_empty() {
0
} else {
source.lines().count()
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use tempfile::tempdir;
#[test]
fn detects_repo_automations_and_runtime_schedule_state() {
let temp_dir = tempdir().expect("temp dir");
let repo_root = temp_dir.path();
fs::create_dir_all(repo_root.join("docs/harness")).expect("docs/harness");
fs::create_dir_all(repo_root.join("docs/fitness")).expect("docs/fitness");
fs::create_dir_all(repo_root.join("src")).expect("src");
fs::write(
repo_root.join("docs/harness/automations.yml"),
[
"schema: harness-automation-v1",
"definitions:",
" - id: long-file-window",
" name: Long-file window",
" source:",
" type: finding",
" findingType: long-file",
" maxItems: 1",
" deferUntilCron: \"0 10 * * 1\"",
" target:",
" type: workflow",
" ref: refactor-window",
" - id: weekly-harness-fluency",
" name: Weekly harness fluency",
" source:",
" type: schedule",
" cron: \"0 3 * * 1\"",
" timezone: UTC",
" target:",
" type: specialist",
" ref: harness-test",
" runtime:",
" scheduleName: Weekly harness fluency",
]
.join("\n"),
)
.expect("automations config");
fs::write(
repo_root.join("docs/fitness/file_budgets.json"),
r#"{
"default_max_lines": 20,
"include_roots": ["src"],
"extensions": [".ts"],
"extension_max_lines": { ".ts": 20 },
"excluded_parts": [],
"overrides": []
}"#,
)
.expect("file budgets");
fs::write(
repo_root.join("src/oversized.ts"),
vec!["export const x = 1;"; 35].join("\n"),
)
.expect("oversized file");
let schedule = Schedule {
id: "schedule-1".to_string(),
name: "Weekly harness fluency".to_string(),
cron_expr: "0 3 * * 1".to_string(),
task_prompt: "Run harness fluency".to_string(),
agent_id: "claude-code".to_string(),
workspace_id: "default".to_string(),
enabled: true,
last_run_at: Some(Utc::now()),
next_run_at: Some(Utc::now()),
last_task_id: Some("task-1".to_string()),
prompt_template: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
let report = detect_repo_automations(repo_root, &[schedule]).expect("report");
assert_eq!(report.definitions.len(), 2);
assert_eq!(report.pending_signals.len(), 1);
assert_eq!(report.pending_signals[0].automation_id, "long-file-window");
assert_eq!(report.recent_runs.len(), 1);
assert_eq!(
report.recent_runs[0].automation_id,
"weekly-harness-fluency"
);
assert_eq!(report.recent_runs[0].status, "active");
}
#[test]
fn surfaces_issue_gc_suspects_as_pending_signals() {
let temp_dir = tempdir().expect("temp dir");
let repo_root = temp_dir.path();
fs::create_dir_all(repo_root.join("docs/harness")).expect("docs/harness");
fs::create_dir_all(repo_root.join(".github/scripts")).expect(".github/scripts");
fs::write(
repo_root.join("docs/harness/automations.yml"),
[
"schema: harness-automation-v1",
"definitions:",
" - id: issue-gc-review",
" name: Issue cleanup review",
" source:",
" type: finding",
" findingType: issue-suspect",
" maxItems: 2",
" deferUntilCron: \"0 9 * * 1\"",
" target:",
" type: workflow",
" ref: issue-garbage-collector",
]
.join("\n"),
)
.expect("automations config");
fs::write(
repo_root.join(".github/scripts/issue-scanner.py"),
[
"import json",
"print(json.dumps([",
" {'file_a': '2026-04-01-old-bug.md', 'file_b': None, 'reason': 'Open for 35 days (>30), likely stale', 'type': 'stale'},",
" {'file_a': '2026-04-02-dup-a.md', 'file_b': '2026-04-02-dup-b.md', 'reason': \"Same area 'ui', keywords: {'layout', 'panel'}\", 'type': 'duplicate'}",
"]))",
]
.join("\n"),
)
.expect("scanner script");
let report = detect_repo_automations(repo_root, &[]).expect("report");
assert_eq!(report.definitions.len(), 1);
assert_eq!(report.definitions[0].runtime_status, "pending");
assert_eq!(report.definitions[0].pending_count, 2);
assert_eq!(
report.definitions[0].source_label,
"issue-suspect · docs/issues scan · defer 0 9 * * 1"
);
assert_eq!(report.pending_signals.len(), 2);
assert_eq!(report.pending_signals[0].signal_type, "stale");
assert_eq!(report.pending_signals[0].severity, "high");
assert_eq!(
report.pending_signals[0].relative_path.as_deref(),
Some("docs/issues/2026-04-01-old-bug.md")
);
assert_eq!(report.pending_signals[1].signal_type, "duplicate");
assert_eq!(report.pending_signals[1].severity, "medium");
}
}