1use std::{
27 collections::BTreeSet,
28 fs,
29 path::{Path, PathBuf},
30};
31
32use agent_domain::{DependencyPolicy, DomainTypeError, RepoPath, TrustLevel};
33use serde::{Deserialize, Serialize};
34use thiserror::Error;
35use toml::Value;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub enum WorkspaceKind {
40 SingleCrate,
42 MultiCrate,
44}
45
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
48pub struct CrateFacts {
49 pub name: String,
51 pub manifest_path: RepoPath,
53 pub edition: String,
55 pub dependencies: BTreeSet<String>,
57 pub source_files: Vec<RepoPath>,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
63pub struct ToolchainFacts {
64 pub rust_toolchain_path: Option<RepoPath>,
66 pub cargo_config_path: Option<RepoPath>,
68 pub ci_workflows: Vec<RepoPath>,
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74pub enum AsyncModel {
75 Tokio,
77 AsyncStd,
79 NoneKnown,
81 Unknown,
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
87pub enum ErrorStyle {
88 ThisError,
90 Anyhow,
92 Standard,
94 Unknown,
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
100pub enum LoggingStyle {
101 Tracing,
103 Log,
105 NoneKnown,
107 Unknown,
109}
110
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
113pub enum TestStyle {
114 Trycmd,
116 AssertCmd,
118 Standard,
120 Unknown,
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
126pub enum CliStyle {
127 Clap,
129 PicoArgs,
131 NoneKnown,
133 Unknown,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
139pub struct ApiBoundary {
140 pub crate_name: String,
142 pub public_paths: Vec<RepoPath>,
144}
145
146#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
148pub struct RepoFact {
149 pub subject: String,
151 pub detail: String,
153 pub trust_level: TrustLevel,
155}
156
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
159pub struct OpenQuestion {
160 pub question: String,
162}
163
164#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
166pub struct WorkingSet {
167 pub files: Vec<RepoPath>,
169 pub symbols: Vec<String>,
171 pub facts: Vec<RepoFact>,
173 pub open_questions: Vec<OpenQuestion>,
175}
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
179pub struct ContextBudget {
180 pub max_files: u16,
182 pub max_lines: u32,
184 pub max_tool_results: u16,
186}
187
188impl Default for ContextBudget {
189 fn default() -> Self {
190 Self {
191 max_files: 16,
192 max_lines: 800,
193 max_tool_results: 12,
194 }
195 }
196}
197
198#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct ContextSnapshot {
201 pub preserved_facts: Vec<RepoFact>,
203 pub current_plan: String,
205 pub active_failures: Vec<String>,
207}
208
209#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
211pub struct RepoModel {
212 pub workspace_kind: WorkspaceKind,
214 pub crates: Vec<CrateFacts>,
216 pub edition: String,
218 pub toolchain: ToolchainFacts,
220 pub async_model: AsyncModel,
222 pub error_style: ErrorStyle,
224 pub logging_style: LoggingStyle,
226 pub test_style: TestStyle,
228 pub cli_style: CliStyle,
230 pub dependency_policy: DependencyPolicy,
232 pub public_api_boundaries: Vec<ApiBoundary>,
234 pub read_order: Vec<RepoPath>,
236}
237
238#[derive(Debug, Default)]
240pub struct RepoModeler;
241
242impl RepoModeler {
243 pub fn scan(root: &Path) -> Result<RepoModel, ContextError> {
268 let root_manifest = root.join("Cargo.toml");
269 let mut read_order = Vec::new();
270 push_if_exists(&mut read_order, root, &root_manifest)?;
271 push_if_exists(&mut read_order, root, &root.join("rust-toolchain.toml"))?;
272 push_if_exists(
273 &mut read_order,
274 root,
275 &root.join(".cargo").join("config.toml"),
276 )?;
277
278 let workflow_dir = root.join(".github").join("workflows");
279 if workflow_dir.is_dir() {
280 let mut workflow_paths = fs::read_dir(&workflow_dir)?
281 .collect::<Result<Vec<_>, _>>()?
282 .into_iter()
283 .map(|entry| entry.path())
284 .collect::<Vec<_>>();
285 workflow_paths.sort();
286 for path in workflow_paths {
287 push_if_exists(&mut read_order, root, &path)?;
288 }
289 }
290
291 push_if_exists(&mut read_order, root, &root.join("AGENTS.md"))?;
292 for directory in ["rules", "path-rules", "modes", "approvals"] {
293 push_directory_entries_if_exists(
294 &mut read_order,
295 root,
296 &root.join(".agent").join(directory),
297 )?;
298 }
299 for candidate in ["README.md", "README"] {
300 push_if_exists(&mut read_order, root, &root.join(candidate))?;
301 }
302
303 let root_value = parse_manifest(&root_manifest)?;
304 let member_manifest_paths = member_manifests(root, &root_value);
305 let mut crates = Vec::new();
306 for manifest in &member_manifest_paths {
307 push_if_exists(&mut read_order, root, manifest)?;
308 crates.push(scan_crate(root, manifest)?);
309 }
310
311 let all_dependencies = crates
312 .iter()
313 .flat_map(|facts| facts.dependencies.iter().cloned())
314 .collect::<BTreeSet<_>>();
315
316 let workspace_kind = if crates.len() > 1 {
317 WorkspaceKind::MultiCrate
318 } else {
319 WorkspaceKind::SingleCrate
320 };
321
322 let toolchain = ToolchainFacts {
323 rust_toolchain_path: relative_path(root, &root.join("rust-toolchain.toml"))?,
324 cargo_config_path: relative_path(root, &root.join(".cargo").join("config.toml"))?,
325 ci_workflows: read_order
326 .iter()
327 .filter(|path| path.as_str().starts_with(".github/workflows/"))
328 .cloned()
329 .collect(),
330 };
331
332 let public_api_boundaries = crates
333 .iter()
334 .filter(|facts| {
335 facts
336 .source_files
337 .iter()
338 .any(|path| path.as_str().ends_with("/src/lib.rs"))
339 })
340 .map(|facts| ApiBoundary {
341 crate_name: facts.name.clone(),
342 public_paths: facts
343 .source_files
344 .iter()
345 .filter(|path| path.as_str().ends_with("/src/lib.rs"))
346 .cloned()
347 .collect(),
348 })
349 .collect();
350
351 let edition = crates
352 .first()
353 .map(|facts| facts.edition.clone())
354 .unwrap_or_else(|| "2024".to_owned());
355
356 Ok(RepoModel {
357 workspace_kind,
358 crates,
359 edition,
360 toolchain,
361 async_model: infer_async_model(&all_dependencies),
362 error_style: infer_error_style(&all_dependencies),
363 logging_style: infer_logging_style(&all_dependencies),
364 test_style: infer_test_style(&all_dependencies),
365 cli_style: infer_cli_style(&all_dependencies),
366 dependency_policy: DependencyPolicy::AllowApproved,
367 public_api_boundaries,
368 read_order,
369 })
370 }
371}
372
373#[derive(Debug, Default)]
375pub struct ContextBuilder;
376
377impl ContextBuilder {
378 #[must_use]
380 pub fn build(repo_model: &RepoModel, budget: ContextBudget) -> WorkingSet {
381 let mut files = repo_model.read_order.clone();
382 for facts in &repo_model.crates {
383 for source_file in &facts.source_files {
384 if !files.contains(source_file) {
385 files.push(source_file.clone());
386 }
387 }
388 }
389 files.truncate(usize::from(budget.max_files));
390
391 let facts = vec![
392 RepoFact {
393 subject: "workspace_kind".to_owned(),
394 detail: format!("{:?}", repo_model.workspace_kind),
395 trust_level: TrustLevel::RepoCode,
396 },
397 RepoFact {
398 subject: "crate_count".to_owned(),
399 detail: repo_model.crates.len().to_string(),
400 trust_level: TrustLevel::RepoCode,
401 },
402 RepoFact {
403 subject: "cli_style".to_owned(),
404 detail: format!("{:?}", repo_model.cli_style),
405 trust_level: TrustLevel::RepoCode,
406 },
407 ];
408
409 let open_questions = if repo_model.public_api_boundaries.is_empty() {
410 vec![OpenQuestion {
411 question: "No library boundary was inferred; public API impact is an inference."
412 .to_owned(),
413 }]
414 } else {
415 Vec::new()
416 };
417
418 WorkingSet {
419 files,
420 symbols: repo_model
421 .crates
422 .iter()
423 .map(|facts| facts.name.clone())
424 .collect(),
425 facts,
426 open_questions,
427 }
428 }
429
430 #[must_use]
432 pub fn snapshot(
433 working_set: &WorkingSet,
434 current_plan: impl Into<String>,
435 active_failures: Vec<String>,
436 ) -> ContextSnapshot {
437 ContextSnapshot {
438 preserved_facts: working_set.facts.clone(),
439 current_plan: current_plan.into(),
440 active_failures,
441 }
442 }
443}
444
445#[derive(Debug, Error)]
447pub enum ContextError {
448 #[error("failed to read repository context: {0}")]
450 Io(#[from] std::io::Error),
451 #[error("failed to parse Cargo manifest {path}: {source}")]
453 Manifest {
454 path: PathBuf,
456 source: toml::de::Error,
458 },
459 #[error("failed to validate repository path: {0}")]
461 InvalidRepoPath(#[from] DomainTypeError),
462 #[error("path `{0}` is outside the scanned workspace root")]
464 ExternalWorkspacePath(PathBuf),
465}
466
467fn push_if_exists(
468 read_order: &mut Vec<RepoPath>,
469 root: &Path,
470 candidate: &Path,
471) -> Result<(), ContextError> {
472 if candidate.exists()
473 && let Some(relative) = relative_path(root, candidate)?
474 {
475 read_order.push(relative);
476 }
477 Ok(())
478}
479
480fn push_directory_entries_if_exists(
481 read_order: &mut Vec<RepoPath>,
482 root: &Path,
483 directory: &Path,
484) -> Result<(), ContextError> {
485 if !directory.is_dir() {
486 return Ok(());
487 }
488
489 let mut entries = fs::read_dir(directory)?
490 .collect::<Result<Vec<_>, _>>()?
491 .into_iter()
492 .map(|entry| entry.path())
493 .collect::<Vec<_>>();
494 entries.sort();
495
496 for entry in entries {
497 push_if_exists(read_order, root, &entry)?;
498 }
499
500 Ok(())
501}
502
503fn parse_manifest(path: &Path) -> Result<Value, ContextError> {
504 let raw = fs::read_to_string(path)?;
505 toml::from_str(&raw).map_err(|source| ContextError::Manifest {
506 path: path.to_path_buf(),
507 source,
508 })
509}
510
511fn member_manifests(root: &Path, manifest: &Value) -> Vec<PathBuf> {
512 manifest
513 .get("workspace")
514 .and_then(Value::as_table)
515 .and_then(|workspace| workspace.get("members"))
516 .and_then(Value::as_array)
517 .map(|members| {
518 members
519 .iter()
520 .filter_map(Value::as_str)
521 .map(|member| root.join(member).join("Cargo.toml"))
522 .collect()
523 })
524 .unwrap_or_else(|| vec![root.join("Cargo.toml")])
525}
526
527fn scan_crate(root: &Path, manifest_path: &Path) -> Result<CrateFacts, ContextError> {
528 let manifest = parse_manifest(manifest_path)?;
529 let package = manifest
530 .get("package")
531 .and_then(Value::as_table)
532 .cloned()
533 .unwrap_or_default();
534 let dependencies = dependency_names(&manifest);
535 let crate_root = manifest_path.parent().unwrap_or(root).to_path_buf();
536
537 let source_files = ["src/lib.rs", "src/main.rs"]
538 .into_iter()
539 .map(|relative| crate_root.join(relative))
540 .filter_map(|path| relative_path(root, &path).transpose())
541 .collect::<Result<Vec<_>, _>>()?;
542
543 Ok(CrateFacts {
544 name: package
545 .get("name")
546 .and_then(Value::as_str)
547 .unwrap_or("unknown")
548 .to_owned(),
549 manifest_path: relative_path(root, manifest_path)?
550 .ok_or_else(|| ContextError::ExternalWorkspacePath(manifest_path.to_path_buf()))?,
551 edition: package
552 .get("edition")
553 .and_then(Value::as_str)
554 .unwrap_or("2024")
555 .to_owned(),
556 dependencies,
557 source_files,
558 })
559}
560
561fn dependency_names(manifest: &Value) -> BTreeSet<String> {
562 manifest
563 .get("dependencies")
564 .and_then(Value::as_table)
565 .map(|dependencies| dependencies.keys().cloned().collect())
566 .unwrap_or_default()
567}
568
569fn infer_async_model(dependencies: &BTreeSet<String>) -> AsyncModel {
570 if dependencies.contains("tokio") {
571 AsyncModel::Tokio
572 } else if dependencies.contains("async-std") {
573 AsyncModel::AsyncStd
574 } else if dependencies.is_empty() {
575 AsyncModel::Unknown
576 } else {
577 AsyncModel::NoneKnown
578 }
579}
580
581fn infer_error_style(dependencies: &BTreeSet<String>) -> ErrorStyle {
582 if dependencies.contains("thiserror") {
583 ErrorStyle::ThisError
584 } else if dependencies.contains("anyhow") {
585 ErrorStyle::Anyhow
586 } else if dependencies.is_empty() {
587 ErrorStyle::Unknown
588 } else {
589 ErrorStyle::Standard
590 }
591}
592
593fn infer_logging_style(dependencies: &BTreeSet<String>) -> LoggingStyle {
594 if dependencies.contains("tracing") {
595 LoggingStyle::Tracing
596 } else if dependencies.contains("log") {
597 LoggingStyle::Log
598 } else if dependencies.is_empty() {
599 LoggingStyle::Unknown
600 } else {
601 LoggingStyle::NoneKnown
602 }
603}
604
605fn infer_test_style(dependencies: &BTreeSet<String>) -> TestStyle {
606 if dependencies.contains("trycmd") {
607 TestStyle::Trycmd
608 } else if dependencies.contains("assert_cmd") {
609 TestStyle::AssertCmd
610 } else if dependencies.is_empty() {
611 TestStyle::Unknown
612 } else {
613 TestStyle::Standard
614 }
615}
616
617fn infer_cli_style(dependencies: &BTreeSet<String>) -> CliStyle {
618 if dependencies.contains("clap") {
619 CliStyle::Clap
620 } else if dependencies.contains("pico-args") {
621 CliStyle::PicoArgs
622 } else if dependencies.is_empty() {
623 CliStyle::Unknown
624 } else {
625 CliStyle::NoneKnown
626 }
627}
628
629fn relative_path(root: &Path, candidate: &Path) -> Result<Option<RepoPath>, ContextError> {
630 if candidate.exists() {
631 let relative = candidate
632 .strip_prefix(root)
633 .map_err(|_| ContextError::ExternalWorkspacePath(candidate.to_path_buf()))?
634 .display()
635 .to_string();
636 Ok(Some(RepoPath::new(relative)?))
637 } else {
638 Ok(None)
639 }
640}
641
642#[cfg(test)]
643mod tests {
644 use std::fs;
645
646 use agent_domain::RepoPath;
647 use tempfile::tempdir;
648
649 use super::{RepoModeler, WorkspaceKind};
650
651 fn repo_path(value: &str) -> RepoPath {
652 match RepoPath::new(value) {
653 Ok(path) => path,
654 Err(error) => panic!("repo path should be valid in test: {error}"),
655 }
656 }
657
658 #[test]
659 fn repo_modeler_discovers_workspace_members() {
660 let tempdir = tempdir().expect("tempdir should be created for context test");
661 let root = tempdir.path();
662
663 fs::create_dir_all(root.join("crates").join("app").join("src"))
664 .expect("crate source directory should be created for context test");
665 fs::create_dir_all(root.join(".agent").join("modes"))
666 .expect("mode directory should be created for context test");
667 fs::write(
668 root.join("Cargo.toml"),
669 "[workspace]\nmembers = [\"crates/app\"]\n",
670 )
671 .expect("workspace manifest should be written for context test");
672 fs::write(
673 root.join("crates").join("app").join("Cargo.toml"),
674 "[package]\nname = \"app\"\nversion = \"0.1.0\"\nedition = \"2024\"\n\n[dependencies]\nclap = \"4\"\n",
675 )
676 .expect("crate manifest should be written for context test");
677 fs::write(
678 root.join("crates").join("app").join("src").join("main.rs"),
679 "fn main() {}\n",
680 )
681 .expect("crate source should be written for context test");
682 fs::write(root.join("AGENTS.md"), "# Rules\n")
683 .expect("agents contract should be written for context test");
684 fs::write(
685 root.join(".agent").join("modes").join("architect.yaml"),
686 "slug: architect\npurpose: read only\n",
687 )
688 .expect("mode file should be written for context test");
689
690 let model = RepoModeler::scan(root).expect("repo model should scan");
691 assert_eq!(model.workspace_kind, WorkspaceKind::SingleCrate);
692 assert_eq!(model.crates.len(), 1);
693 assert_eq!(model.crates[0].name, "app");
694 assert!(model.read_order.contains(&repo_path("AGENTS.md")));
695 assert!(
696 model
697 .read_order
698 .contains(&repo_path(".agent/modes/architect.yaml"))
699 );
700 }
701}