use std::{
collections::BTreeSet,
fs,
path::{Path, PathBuf},
};
use agent_domain::{DependencyPolicy, DomainTypeError, RepoPath, TrustLevel};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use toml::Value;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum WorkspaceKind {
SingleCrate,
MultiCrate,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CrateFacts {
pub name: String,
pub manifest_path: RepoPath,
pub edition: String,
pub dependencies: BTreeSet<String>,
pub source_files: Vec<RepoPath>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct ToolchainFacts {
pub rust_toolchain_path: Option<RepoPath>,
pub cargo_config_path: Option<RepoPath>,
pub ci_workflows: Vec<RepoPath>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum AsyncModel {
Tokio,
AsyncStd,
NoneKnown,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ErrorStyle {
ThisError,
Anyhow,
Standard,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum LoggingStyle {
Tracing,
Log,
NoneKnown,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TestStyle {
Trycmd,
AssertCmd,
Standard,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CliStyle {
Clap,
PicoArgs,
NoneKnown,
Unknown,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ApiBoundary {
pub crate_name: String,
pub public_paths: Vec<RepoPath>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RepoFact {
pub subject: String,
pub detail: String,
pub trust_level: TrustLevel,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct OpenQuestion {
pub question: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct WorkingSet {
pub files: Vec<RepoPath>,
pub symbols: Vec<String>,
pub facts: Vec<RepoFact>,
pub open_questions: Vec<OpenQuestion>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub struct ContextBudget {
pub max_files: u16,
pub max_lines: u32,
pub max_tool_results: u16,
}
impl Default for ContextBudget {
fn default() -> Self {
Self {
max_files: 16,
max_lines: 800,
max_tool_results: 12,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ContextSnapshot {
pub preserved_facts: Vec<RepoFact>,
pub current_plan: String,
pub active_failures: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RepoModel {
pub workspace_kind: WorkspaceKind,
pub crates: Vec<CrateFacts>,
pub edition: String,
pub toolchain: ToolchainFacts,
pub async_model: AsyncModel,
pub error_style: ErrorStyle,
pub logging_style: LoggingStyle,
pub test_style: TestStyle,
pub cli_style: CliStyle,
pub dependency_policy: DependencyPolicy,
pub public_api_boundaries: Vec<ApiBoundary>,
pub read_order: Vec<RepoPath>,
}
#[derive(Debug, Default)]
pub struct RepoModeler;
impl RepoModeler {
pub fn scan(root: &Path) -> Result<RepoModel, ContextError> {
let root_manifest = root.join("Cargo.toml");
let mut read_order = Vec::new();
push_if_exists(&mut read_order, root, &root_manifest)?;
push_if_exists(&mut read_order, root, &root.join("rust-toolchain.toml"))?;
push_if_exists(
&mut read_order,
root,
&root.join(".cargo").join("config.toml"),
)?;
let workflow_dir = root.join(".github").join("workflows");
if workflow_dir.is_dir() {
let mut workflow_paths = fs::read_dir(&workflow_dir)?
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.map(|entry| entry.path())
.collect::<Vec<_>>();
workflow_paths.sort();
for path in workflow_paths {
push_if_exists(&mut read_order, root, &path)?;
}
}
push_if_exists(&mut read_order, root, &root.join("AGENTS.md"))?;
for directory in ["rules", "path-rules", "modes", "approvals"] {
push_directory_entries_if_exists(
&mut read_order,
root,
&root.join(".agent").join(directory),
)?;
}
for candidate in ["README.md", "README"] {
push_if_exists(&mut read_order, root, &root.join(candidate))?;
}
let root_value = parse_manifest(&root_manifest)?;
let member_manifest_paths = member_manifests(root, &root_value);
let mut crates = Vec::new();
for manifest in &member_manifest_paths {
push_if_exists(&mut read_order, root, manifest)?;
crates.push(scan_crate(root, manifest)?);
}
let all_dependencies = crates
.iter()
.flat_map(|facts| facts.dependencies.iter().cloned())
.collect::<BTreeSet<_>>();
let workspace_kind = if crates.len() > 1 {
WorkspaceKind::MultiCrate
} else {
WorkspaceKind::SingleCrate
};
let toolchain = ToolchainFacts {
rust_toolchain_path: relative_path(root, &root.join("rust-toolchain.toml"))?,
cargo_config_path: relative_path(root, &root.join(".cargo").join("config.toml"))?,
ci_workflows: read_order
.iter()
.filter(|path| path.as_str().starts_with(".github/workflows/"))
.cloned()
.collect(),
};
let public_api_boundaries = crates
.iter()
.filter(|facts| {
facts
.source_files
.iter()
.any(|path| path.as_str().ends_with("/src/lib.rs"))
})
.map(|facts| ApiBoundary {
crate_name: facts.name.clone(),
public_paths: facts
.source_files
.iter()
.filter(|path| path.as_str().ends_with("/src/lib.rs"))
.cloned()
.collect(),
})
.collect();
let edition = crates
.first()
.map(|facts| facts.edition.clone())
.unwrap_or_else(|| "2024".to_owned());
Ok(RepoModel {
workspace_kind,
crates,
edition,
toolchain,
async_model: infer_async_model(&all_dependencies),
error_style: infer_error_style(&all_dependencies),
logging_style: infer_logging_style(&all_dependencies),
test_style: infer_test_style(&all_dependencies),
cli_style: infer_cli_style(&all_dependencies),
dependency_policy: DependencyPolicy::AllowApproved,
public_api_boundaries,
read_order,
})
}
}
#[derive(Debug, Default)]
pub struct ContextBuilder;
impl ContextBuilder {
#[must_use]
pub fn build(repo_model: &RepoModel, budget: ContextBudget) -> WorkingSet {
let mut files = repo_model.read_order.clone();
for facts in &repo_model.crates {
for source_file in &facts.source_files {
if !files.contains(source_file) {
files.push(source_file.clone());
}
}
}
files.truncate(usize::from(budget.max_files));
let facts = vec![
RepoFact {
subject: "workspace_kind".to_owned(),
detail: format!("{:?}", repo_model.workspace_kind),
trust_level: TrustLevel::RepoCode,
},
RepoFact {
subject: "crate_count".to_owned(),
detail: repo_model.crates.len().to_string(),
trust_level: TrustLevel::RepoCode,
},
RepoFact {
subject: "cli_style".to_owned(),
detail: format!("{:?}", repo_model.cli_style),
trust_level: TrustLevel::RepoCode,
},
];
let open_questions = if repo_model.public_api_boundaries.is_empty() {
vec![OpenQuestion {
question: "No library boundary was inferred; public API impact is an inference."
.to_owned(),
}]
} else {
Vec::new()
};
WorkingSet {
files,
symbols: repo_model
.crates
.iter()
.map(|facts| facts.name.clone())
.collect(),
facts,
open_questions,
}
}
#[must_use]
pub fn snapshot(
working_set: &WorkingSet,
current_plan: impl Into<String>,
active_failures: Vec<String>,
) -> ContextSnapshot {
ContextSnapshot {
preserved_facts: working_set.facts.clone(),
current_plan: current_plan.into(),
active_failures,
}
}
}
#[derive(Debug, Error)]
pub enum ContextError {
#[error("failed to read repository context: {0}")]
Io(#[from] std::io::Error),
#[error("failed to parse Cargo manifest {path}: {source}")]
Manifest {
path: PathBuf,
source: toml::de::Error,
},
#[error("failed to validate repository path: {0}")]
InvalidRepoPath(#[from] DomainTypeError),
#[error("path `{0}` is outside the scanned workspace root")]
ExternalWorkspacePath(PathBuf),
}
fn push_if_exists(
read_order: &mut Vec<RepoPath>,
root: &Path,
candidate: &Path,
) -> Result<(), ContextError> {
if candidate.exists()
&& let Some(relative) = relative_path(root, candidate)?
{
read_order.push(relative);
}
Ok(())
}
fn push_directory_entries_if_exists(
read_order: &mut Vec<RepoPath>,
root: &Path,
directory: &Path,
) -> Result<(), ContextError> {
if !directory.is_dir() {
return Ok(());
}
let mut entries = fs::read_dir(directory)?
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.map(|entry| entry.path())
.collect::<Vec<_>>();
entries.sort();
for entry in entries {
push_if_exists(read_order, root, &entry)?;
}
Ok(())
}
fn parse_manifest(path: &Path) -> Result<Value, ContextError> {
let raw = fs::read_to_string(path)?;
toml::from_str(&raw).map_err(|source| ContextError::Manifest {
path: path.to_path_buf(),
source,
})
}
fn member_manifests(root: &Path, manifest: &Value) -> Vec<PathBuf> {
manifest
.get("workspace")
.and_then(Value::as_table)
.and_then(|workspace| workspace.get("members"))
.and_then(Value::as_array)
.map(|members| {
members
.iter()
.filter_map(Value::as_str)
.map(|member| root.join(member).join("Cargo.toml"))
.collect()
})
.unwrap_or_else(|| vec![root.join("Cargo.toml")])
}
fn scan_crate(root: &Path, manifest_path: &Path) -> Result<CrateFacts, ContextError> {
let manifest = parse_manifest(manifest_path)?;
let package = manifest
.get("package")
.and_then(Value::as_table)
.cloned()
.unwrap_or_default();
let dependencies = dependency_names(&manifest);
let crate_root = manifest_path.parent().unwrap_or(root).to_path_buf();
let source_files = ["src/lib.rs", "src/main.rs"]
.into_iter()
.map(|relative| crate_root.join(relative))
.filter_map(|path| relative_path(root, &path).transpose())
.collect::<Result<Vec<_>, _>>()?;
Ok(CrateFacts {
name: package
.get("name")
.and_then(Value::as_str)
.unwrap_or("unknown")
.to_owned(),
manifest_path: relative_path(root, manifest_path)?
.ok_or_else(|| ContextError::ExternalWorkspacePath(manifest_path.to_path_buf()))?,
edition: package
.get("edition")
.and_then(Value::as_str)
.unwrap_or("2024")
.to_owned(),
dependencies,
source_files,
})
}
fn dependency_names(manifest: &Value) -> BTreeSet<String> {
manifest
.get("dependencies")
.and_then(Value::as_table)
.map(|dependencies| dependencies.keys().cloned().collect())
.unwrap_or_default()
}
fn infer_async_model(dependencies: &BTreeSet<String>) -> AsyncModel {
if dependencies.contains("tokio") {
AsyncModel::Tokio
} else if dependencies.contains("async-std") {
AsyncModel::AsyncStd
} else if dependencies.is_empty() {
AsyncModel::Unknown
} else {
AsyncModel::NoneKnown
}
}
fn infer_error_style(dependencies: &BTreeSet<String>) -> ErrorStyle {
if dependencies.contains("thiserror") {
ErrorStyle::ThisError
} else if dependencies.contains("anyhow") {
ErrorStyle::Anyhow
} else if dependencies.is_empty() {
ErrorStyle::Unknown
} else {
ErrorStyle::Standard
}
}
fn infer_logging_style(dependencies: &BTreeSet<String>) -> LoggingStyle {
if dependencies.contains("tracing") {
LoggingStyle::Tracing
} else if dependencies.contains("log") {
LoggingStyle::Log
} else if dependencies.is_empty() {
LoggingStyle::Unknown
} else {
LoggingStyle::NoneKnown
}
}
fn infer_test_style(dependencies: &BTreeSet<String>) -> TestStyle {
if dependencies.contains("trycmd") {
TestStyle::Trycmd
} else if dependencies.contains("assert_cmd") {
TestStyle::AssertCmd
} else if dependencies.is_empty() {
TestStyle::Unknown
} else {
TestStyle::Standard
}
}
fn infer_cli_style(dependencies: &BTreeSet<String>) -> CliStyle {
if dependencies.contains("clap") {
CliStyle::Clap
} else if dependencies.contains("pico-args") {
CliStyle::PicoArgs
} else if dependencies.is_empty() {
CliStyle::Unknown
} else {
CliStyle::NoneKnown
}
}
fn relative_path(root: &Path, candidate: &Path) -> Result<Option<RepoPath>, ContextError> {
if candidate.exists() {
let relative = candidate
.strip_prefix(root)
.map_err(|_| ContextError::ExternalWorkspacePath(candidate.to_path_buf()))?
.display()
.to_string();
Ok(Some(RepoPath::new(relative)?))
} else {
Ok(None)
}
}
#[cfg(test)]
mod tests {
use std::fs;
use agent_domain::RepoPath;
use tempfile::tempdir;
use super::{RepoModeler, WorkspaceKind};
fn repo_path(value: &str) -> RepoPath {
match RepoPath::new(value) {
Ok(path) => path,
Err(error) => panic!("repo path should be valid in test: {error}"),
}
}
#[test]
fn repo_modeler_discovers_workspace_members() {
let tempdir = tempdir().expect("tempdir should be created for context test");
let root = tempdir.path();
fs::create_dir_all(root.join("crates").join("app").join("src"))
.expect("crate source directory should be created for context test");
fs::create_dir_all(root.join(".agent").join("modes"))
.expect("mode directory should be created for context test");
fs::write(
root.join("Cargo.toml"),
"[workspace]\nmembers = [\"crates/app\"]\n",
)
.expect("workspace manifest should be written for context test");
fs::write(
root.join("crates").join("app").join("Cargo.toml"),
"[package]\nname = \"app\"\nversion = \"0.1.0\"\nedition = \"2024\"\n\n[dependencies]\nclap = \"4\"\n",
)
.expect("crate manifest should be written for context test");
fs::write(
root.join("crates").join("app").join("src").join("main.rs"),
"fn main() {}\n",
)
.expect("crate source should be written for context test");
fs::write(root.join("AGENTS.md"), "# Rules\n")
.expect("agents contract should be written for context test");
fs::write(
root.join(".agent").join("modes").join("architect.yaml"),
"slug: architect\npurpose: read only\n",
)
.expect("mode file should be written for context test");
let model = RepoModeler::scan(root).expect("repo model should scan");
assert_eq!(model.workspace_kind, WorkspaceKind::SingleCrate);
assert_eq!(model.crates.len(), 1);
assert_eq!(model.crates[0].name, "app");
assert!(model.read_order.contains(&repo_path("AGENTS.md")));
assert!(
model
.read_order
.contains(&repo_path(".agent/modes/architect.yaml"))
);
}
}