Skip to main content

ferrify_context/
lib.rs

1//! Repository modeling and working-context selection.
2//!
3//! `agent-context` is the read-only exploration layer in Ferrify. It inspects a
4//! repository, records structural facts such as workspace members and toolchain
5//! files, and produces a bounded working set that later stages can use without
6//! carrying the entire repo into memory.
7//!
8//! The crate follows a structural-first read order. It looks at root manifests,
9//! toolchain configuration, CI files, repository policy, and only then expands
10//! into nearby code. That order matters because Ferrify treats current
11//! repository evidence as stronger than remembered conventions.
12//!
13//! # Examples
14//!
15//! ```no_run
16//! use agent_context::RepoModeler;
17//! use std::path::Path;
18//!
19//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
20//! let model = RepoModeler::scan(Path::new("."))?;
21//! assert!(!model.crates.is_empty());
22//! # Ok(())
23//! # }
24//! ```
25
26use std::{
27    collections::BTreeSet,
28    fs,
29    path::{Path, PathBuf},
30};
31
32use agent_domain::{DependencyPolicy, DomainTypeError, RepoPath, TrustLevel};
33use serde::{Deserialize, Serialize};
34use thiserror::Error;
35use toml::Value;
36
37/// The broad workspace shape discovered during scanning.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub enum WorkspaceKind {
40    /// A single package rooted at the repository root.
41    SingleCrate,
42    /// A Cargo workspace with one or more member crates.
43    MultiCrate,
44}
45
46/// Facts collected for one Cargo crate.
47#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
48pub struct CrateFacts {
49    /// The crate name from `Cargo.toml`.
50    pub name: String,
51    /// The manifest path for the crate.
52    pub manifest_path: RepoPath,
53    /// The Rust edition declared by the crate.
54    pub edition: String,
55    /// Dependency names observed in the manifest.
56    pub dependencies: BTreeSet<String>,
57    /// Source files that anchor the crate in the working set.
58    pub source_files: Vec<RepoPath>,
59}
60
61/// Toolchain files and CI entry points discovered at the root.
62#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
63pub struct ToolchainFacts {
64    /// `rust-toolchain.toml` when present.
65    pub rust_toolchain_path: Option<RepoPath>,
66    /// `.cargo/config.toml` when present.
67    pub cargo_config_path: Option<RepoPath>,
68    /// CI workflow files under `.github/workflows`.
69    pub ci_workflows: Vec<RepoPath>,
70}
71
72/// The async runtime posture inferred from dependencies.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74pub enum AsyncModel {
75    /// The repo uses Tokio.
76    Tokio,
77    /// The repo uses async-std.
78    AsyncStd,
79    /// The repo does not appear to use an async runtime.
80    NoneKnown,
81    /// The runtime is not obvious yet.
82    Unknown,
83}
84
85/// The error handling style inferred from dependencies.
86#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
87pub enum ErrorStyle {
88    /// The repo prefers `thiserror`.
89    ThisError,
90    /// The repo prefers `anyhow`.
91    Anyhow,
92    /// The repo uses hand-rolled error types or standard errors.
93    Standard,
94    /// The style has not been established yet.
95    Unknown,
96}
97
98/// The logging style inferred from dependencies.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
100pub enum LoggingStyle {
101    /// The repo uses `tracing`.
102    Tracing,
103    /// The repo uses the `log` facade.
104    Log,
105    /// The repo does not appear to use logging crates.
106    NoneKnown,
107    /// The style has not been established yet.
108    Unknown,
109}
110
111/// The testing style inferred from dependencies.
112#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
113pub enum TestStyle {
114    /// The repo uses `trycmd`.
115    Trycmd,
116    /// The repo uses `assert_cmd`.
117    AssertCmd,
118    /// The repo relies on standard Rust tests.
119    Standard,
120    /// The style has not been established yet.
121    Unknown,
122}
123
124/// The CLI implementation style inferred from dependencies.
125#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
126pub enum CliStyle {
127    /// The repo uses `clap`.
128    Clap,
129    /// The repo uses `pico-args`.
130    PicoArgs,
131    /// The repo does not appear to define a CLI yet.
132    NoneKnown,
133    /// The style has not been established yet.
134    Unknown,
135}
136
137/// A discovered public API boundary.
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
139pub struct ApiBoundary {
140    /// The crate that owns the boundary.
141    pub crate_name: String,
142    /// Paths that define the public boundary.
143    pub public_paths: Vec<RepoPath>,
144}
145
146/// A repository fact preserved across compaction.
147#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
148pub struct RepoFact {
149    /// The fact subject.
150    pub subject: String,
151    /// The fact detail.
152    pub detail: String,
153    /// The trust classification for the fact.
154    pub trust_level: TrustLevel,
155}
156
157/// An unresolved question that should remain visible to later stages.
158#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
159pub struct OpenQuestion {
160    /// The question text.
161    pub question: String,
162}
163
164/// The compact working set handed to planning.
165#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
166pub struct WorkingSet {
167    /// Files selected for the active context.
168    pub files: Vec<RepoPath>,
169    /// Symbol-like identifiers carried into planning.
170    pub symbols: Vec<String>,
171    /// Durable facts extracted from the repository.
172    pub facts: Vec<RepoFact>,
173    /// Open questions that still matter.
174    pub open_questions: Vec<OpenQuestion>,
175}
176
177/// Limits for context selection before compaction.
178#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
179pub struct ContextBudget {
180    /// Maximum number of files to retain.
181    pub max_files: u16,
182    /// Maximum number of lines to retain.
183    pub max_lines: u32,
184    /// Maximum number of tool results to retain.
185    pub max_tool_results: u16,
186}
187
188impl Default for ContextBudget {
189    fn default() -> Self {
190        Self {
191            max_files: 16,
192            max_lines: 800,
193            max_tool_results: 12,
194        }
195    }
196}
197
198/// The compacted context snapshot that survives between stages.
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct ContextSnapshot {
201    /// Facts preserved from earlier exploration.
202    pub preserved_facts: Vec<RepoFact>,
203    /// The active plan summary.
204    pub current_plan: String,
205    /// Active failures that need follow-up.
206    pub active_failures: Vec<String>,
207}
208
209/// The repository model built before planning.
210#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
211pub struct RepoModel {
212    /// Whether the repository is a single crate or a workspace.
213    pub workspace_kind: WorkspaceKind,
214    /// Facts for discovered crates.
215    pub crates: Vec<CrateFacts>,
216    /// The default edition inferred from the root or first crate.
217    pub edition: String,
218    /// Toolchain and CI facts discovered at the root.
219    pub toolchain: ToolchainFacts,
220    /// The inferred async runtime posture.
221    pub async_model: AsyncModel,
222    /// The inferred error handling style.
223    pub error_style: ErrorStyle,
224    /// The inferred logging style.
225    pub logging_style: LoggingStyle,
226    /// The inferred test style.
227    pub test_style: TestStyle,
228    /// The inferred CLI style.
229    pub cli_style: CliStyle,
230    /// The repository stance on dependency changes.
231    pub dependency_policy: DependencyPolicy,
232    /// Public API boundaries that should constrain planning.
233    pub public_api_boundaries: Vec<ApiBoundary>,
234    /// Files read in the prescribed discovery order.
235    pub read_order: Vec<RepoPath>,
236}
237
238/// Scans a repository root and builds a `RepoModel`.
239#[derive(Debug, Default)]
240pub struct RepoModeler;
241
242impl RepoModeler {
243    /// Scans the repository root using Ferrify's structural-first read order.
244    ///
245    /// The scan prefers root manifests, toolchain files, CI entry points, and
246    /// repository policy before it expands into crate-specific source files.
247    /// That ordering keeps planning grounded in the repo's declared structure.
248    ///
249    /// # Errors
250    ///
251    /// Returns [`ContextError`] when a required manifest cannot be read or
252    /// parsed, when repository-relative paths cannot be normalized into
253    /// [`RepoPath`], or when the filesystem cannot be traversed.
254    ///
255    /// # Examples
256    ///
257    /// ```no_run
258    /// use agent_context::RepoModeler;
259    /// use std::path::Path;
260    ///
261    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
262    /// let repo_model = RepoModeler::scan(Path::new("."))?;
263    /// println!("discovered {} crate(s)", repo_model.crates.len());
264    /// # Ok(())
265    /// # }
266    /// ```
267    pub fn scan(root: &Path) -> Result<RepoModel, ContextError> {
268        let root_manifest = root.join("Cargo.toml");
269        let mut read_order = Vec::new();
270        push_if_exists(&mut read_order, root, &root_manifest)?;
271        push_if_exists(&mut read_order, root, &root.join("rust-toolchain.toml"))?;
272        push_if_exists(
273            &mut read_order,
274            root,
275            &root.join(".cargo").join("config.toml"),
276        )?;
277
278        let workflow_dir = root.join(".github").join("workflows");
279        if workflow_dir.is_dir() {
280            let mut workflow_paths = fs::read_dir(&workflow_dir)?
281                .collect::<Result<Vec<_>, _>>()?
282                .into_iter()
283                .map(|entry| entry.path())
284                .collect::<Vec<_>>();
285            workflow_paths.sort();
286            for path in workflow_paths {
287                push_if_exists(&mut read_order, root, &path)?;
288            }
289        }
290
291        push_if_exists(&mut read_order, root, &root.join("AGENTS.md"))?;
292        for directory in ["rules", "path-rules", "modes", "approvals"] {
293            push_directory_entries_if_exists(
294                &mut read_order,
295                root,
296                &root.join(".agent").join(directory),
297            )?;
298        }
299        for candidate in ["README.md", "README"] {
300            push_if_exists(&mut read_order, root, &root.join(candidate))?;
301        }
302
303        let root_value = parse_manifest(&root_manifest)?;
304        let member_manifest_paths = member_manifests(root, &root_value);
305        let mut crates = Vec::new();
306        for manifest in &member_manifest_paths {
307            push_if_exists(&mut read_order, root, manifest)?;
308            crates.push(scan_crate(root, manifest)?);
309        }
310
311        let all_dependencies = crates
312            .iter()
313            .flat_map(|facts| facts.dependencies.iter().cloned())
314            .collect::<BTreeSet<_>>();
315
316        let workspace_kind = if crates.len() > 1 {
317            WorkspaceKind::MultiCrate
318        } else {
319            WorkspaceKind::SingleCrate
320        };
321
322        let toolchain = ToolchainFacts {
323            rust_toolchain_path: relative_path(root, &root.join("rust-toolchain.toml"))?,
324            cargo_config_path: relative_path(root, &root.join(".cargo").join("config.toml"))?,
325            ci_workflows: read_order
326                .iter()
327                .filter(|path| path.as_str().starts_with(".github/workflows/"))
328                .cloned()
329                .collect(),
330        };
331
332        let public_api_boundaries = crates
333            .iter()
334            .filter(|facts| {
335                facts
336                    .source_files
337                    .iter()
338                    .any(|path| path.as_str().ends_with("/src/lib.rs"))
339            })
340            .map(|facts| ApiBoundary {
341                crate_name: facts.name.clone(),
342                public_paths: facts
343                    .source_files
344                    .iter()
345                    .filter(|path| path.as_str().ends_with("/src/lib.rs"))
346                    .cloned()
347                    .collect(),
348            })
349            .collect();
350
351        let edition = crates
352            .first()
353            .map(|facts| facts.edition.clone())
354            .unwrap_or_else(|| "2024".to_owned());
355
356        Ok(RepoModel {
357            workspace_kind,
358            crates,
359            edition,
360            toolchain,
361            async_model: infer_async_model(&all_dependencies),
362            error_style: infer_error_style(&all_dependencies),
363            logging_style: infer_logging_style(&all_dependencies),
364            test_style: infer_test_style(&all_dependencies),
365            cli_style: infer_cli_style(&all_dependencies),
366            dependency_policy: DependencyPolicy::AllowApproved,
367            public_api_boundaries,
368            read_order,
369        })
370    }
371}
372
373/// Builds a compact working set from a `RepoModel`.
374#[derive(Debug, Default)]
375pub struct ContextBuilder;
376
377impl ContextBuilder {
378    /// Selects a bounded working set from the repository model.
379    #[must_use]
380    pub fn build(repo_model: &RepoModel, budget: ContextBudget) -> WorkingSet {
381        let mut files = repo_model.read_order.clone();
382        for facts in &repo_model.crates {
383            for source_file in &facts.source_files {
384                if !files.contains(source_file) {
385                    files.push(source_file.clone());
386                }
387            }
388        }
389        files.truncate(usize::from(budget.max_files));
390
391        let facts = vec![
392            RepoFact {
393                subject: "workspace_kind".to_owned(),
394                detail: format!("{:?}", repo_model.workspace_kind),
395                trust_level: TrustLevel::RepoCode,
396            },
397            RepoFact {
398                subject: "crate_count".to_owned(),
399                detail: repo_model.crates.len().to_string(),
400                trust_level: TrustLevel::RepoCode,
401            },
402            RepoFact {
403                subject: "cli_style".to_owned(),
404                detail: format!("{:?}", repo_model.cli_style),
405                trust_level: TrustLevel::RepoCode,
406            },
407        ];
408
409        let open_questions = if repo_model.public_api_boundaries.is_empty() {
410            vec![OpenQuestion {
411                question: "No library boundary was inferred; public API impact is an inference."
412                    .to_owned(),
413            }]
414        } else {
415            Vec::new()
416        };
417
418        WorkingSet {
419            files,
420            symbols: repo_model
421                .crates
422                .iter()
423                .map(|facts| facts.name.clone())
424                .collect(),
425            facts,
426            open_questions,
427        }
428    }
429
430    /// Compacts the current state into a durable snapshot.
431    #[must_use]
432    pub fn snapshot(
433        working_set: &WorkingSet,
434        current_plan: impl Into<String>,
435        active_failures: Vec<String>,
436    ) -> ContextSnapshot {
437        ContextSnapshot {
438            preserved_facts: working_set.facts.clone(),
439            current_plan: current_plan.into(),
440            active_failures,
441        }
442    }
443}
444
445/// Errors produced while scanning repository context.
446#[derive(Debug, Error)]
447pub enum ContextError {
448    /// Filesystem access failed.
449    #[error("failed to read repository context: {0}")]
450    Io(#[from] std::io::Error),
451    /// Manifest parsing failed.
452    #[error("failed to parse Cargo manifest {path}: {source}")]
453    Manifest {
454        /// The manifest path that failed to parse.
455        path: PathBuf,
456        /// The underlying parse error.
457        source: toml::de::Error,
458    },
459    /// A discovered repository path violated the domain path invariants.
460    #[error("failed to validate repository path: {0}")]
461    InvalidRepoPath(#[from] DomainTypeError),
462    /// A discovered file was not rooted under the scanned workspace.
463    #[error("path `{0}` is outside the scanned workspace root")]
464    ExternalWorkspacePath(PathBuf),
465}
466
467fn push_if_exists(
468    read_order: &mut Vec<RepoPath>,
469    root: &Path,
470    candidate: &Path,
471) -> Result<(), ContextError> {
472    if candidate.exists()
473        && let Some(relative) = relative_path(root, candidate)?
474    {
475        read_order.push(relative);
476    }
477    Ok(())
478}
479
480fn push_directory_entries_if_exists(
481    read_order: &mut Vec<RepoPath>,
482    root: &Path,
483    directory: &Path,
484) -> Result<(), ContextError> {
485    if !directory.is_dir() {
486        return Ok(());
487    }
488
489    let mut entries = fs::read_dir(directory)?
490        .collect::<Result<Vec<_>, _>>()?
491        .into_iter()
492        .map(|entry| entry.path())
493        .collect::<Vec<_>>();
494    entries.sort();
495
496    for entry in entries {
497        push_if_exists(read_order, root, &entry)?;
498    }
499
500    Ok(())
501}
502
503fn parse_manifest(path: &Path) -> Result<Value, ContextError> {
504    let raw = fs::read_to_string(path)?;
505    toml::from_str(&raw).map_err(|source| ContextError::Manifest {
506        path: path.to_path_buf(),
507        source,
508    })
509}
510
511fn member_manifests(root: &Path, manifest: &Value) -> Vec<PathBuf> {
512    manifest
513        .get("workspace")
514        .and_then(Value::as_table)
515        .and_then(|workspace| workspace.get("members"))
516        .and_then(Value::as_array)
517        .map(|members| {
518            members
519                .iter()
520                .filter_map(Value::as_str)
521                .map(|member| root.join(member).join("Cargo.toml"))
522                .collect()
523        })
524        .unwrap_or_else(|| vec![root.join("Cargo.toml")])
525}
526
527fn scan_crate(root: &Path, manifest_path: &Path) -> Result<CrateFacts, ContextError> {
528    let manifest = parse_manifest(manifest_path)?;
529    let package = manifest
530        .get("package")
531        .and_then(Value::as_table)
532        .cloned()
533        .unwrap_or_default();
534    let dependencies = dependency_names(&manifest);
535    let crate_root = manifest_path.parent().unwrap_or(root).to_path_buf();
536
537    let source_files = ["src/lib.rs", "src/main.rs"]
538        .into_iter()
539        .map(|relative| crate_root.join(relative))
540        .filter_map(|path| relative_path(root, &path).transpose())
541        .collect::<Result<Vec<_>, _>>()?;
542
543    Ok(CrateFacts {
544        name: package
545            .get("name")
546            .and_then(Value::as_str)
547            .unwrap_or("unknown")
548            .to_owned(),
549        manifest_path: relative_path(root, manifest_path)?
550            .ok_or_else(|| ContextError::ExternalWorkspacePath(manifest_path.to_path_buf()))?,
551        edition: package
552            .get("edition")
553            .and_then(Value::as_str)
554            .unwrap_or("2024")
555            .to_owned(),
556        dependencies,
557        source_files,
558    })
559}
560
561fn dependency_names(manifest: &Value) -> BTreeSet<String> {
562    manifest
563        .get("dependencies")
564        .and_then(Value::as_table)
565        .map(|dependencies| dependencies.keys().cloned().collect())
566        .unwrap_or_default()
567}
568
569fn infer_async_model(dependencies: &BTreeSet<String>) -> AsyncModel {
570    if dependencies.contains("tokio") {
571        AsyncModel::Tokio
572    } else if dependencies.contains("async-std") {
573        AsyncModel::AsyncStd
574    } else if dependencies.is_empty() {
575        AsyncModel::Unknown
576    } else {
577        AsyncModel::NoneKnown
578    }
579}
580
581fn infer_error_style(dependencies: &BTreeSet<String>) -> ErrorStyle {
582    if dependencies.contains("thiserror") {
583        ErrorStyle::ThisError
584    } else if dependencies.contains("anyhow") {
585        ErrorStyle::Anyhow
586    } else if dependencies.is_empty() {
587        ErrorStyle::Unknown
588    } else {
589        ErrorStyle::Standard
590    }
591}
592
593fn infer_logging_style(dependencies: &BTreeSet<String>) -> LoggingStyle {
594    if dependencies.contains("tracing") {
595        LoggingStyle::Tracing
596    } else if dependencies.contains("log") {
597        LoggingStyle::Log
598    } else if dependencies.is_empty() {
599        LoggingStyle::Unknown
600    } else {
601        LoggingStyle::NoneKnown
602    }
603}
604
605fn infer_test_style(dependencies: &BTreeSet<String>) -> TestStyle {
606    if dependencies.contains("trycmd") {
607        TestStyle::Trycmd
608    } else if dependencies.contains("assert_cmd") {
609        TestStyle::AssertCmd
610    } else if dependencies.is_empty() {
611        TestStyle::Unknown
612    } else {
613        TestStyle::Standard
614    }
615}
616
617fn infer_cli_style(dependencies: &BTreeSet<String>) -> CliStyle {
618    if dependencies.contains("clap") {
619        CliStyle::Clap
620    } else if dependencies.contains("pico-args") {
621        CliStyle::PicoArgs
622    } else if dependencies.is_empty() {
623        CliStyle::Unknown
624    } else {
625        CliStyle::NoneKnown
626    }
627}
628
629fn relative_path(root: &Path, candidate: &Path) -> Result<Option<RepoPath>, ContextError> {
630    if candidate.exists() {
631        let relative = candidate
632            .strip_prefix(root)
633            .map_err(|_| ContextError::ExternalWorkspacePath(candidate.to_path_buf()))?
634            .display()
635            .to_string();
636        Ok(Some(RepoPath::new(relative)?))
637    } else {
638        Ok(None)
639    }
640}
641
642#[cfg(test)]
643mod tests {
644    use std::fs;
645
646    use agent_domain::RepoPath;
647    use tempfile::tempdir;
648
649    use super::{RepoModeler, WorkspaceKind};
650
651    fn repo_path(value: &str) -> RepoPath {
652        match RepoPath::new(value) {
653            Ok(path) => path,
654            Err(error) => panic!("repo path should be valid in test: {error}"),
655        }
656    }
657
658    #[test]
659    fn repo_modeler_discovers_workspace_members() {
660        let tempdir = tempdir().expect("tempdir should be created for context test");
661        let root = tempdir.path();
662
663        fs::create_dir_all(root.join("crates").join("app").join("src"))
664            .expect("crate source directory should be created for context test");
665        fs::create_dir_all(root.join(".agent").join("modes"))
666            .expect("mode directory should be created for context test");
667        fs::write(
668            root.join("Cargo.toml"),
669            "[workspace]\nmembers = [\"crates/app\"]\n",
670        )
671        .expect("workspace manifest should be written for context test");
672        fs::write(
673            root.join("crates").join("app").join("Cargo.toml"),
674            "[package]\nname = \"app\"\nversion = \"0.1.0\"\nedition = \"2024\"\n\n[dependencies]\nclap = \"4\"\n",
675        )
676        .expect("crate manifest should be written for context test");
677        fs::write(
678            root.join("crates").join("app").join("src").join("main.rs"),
679            "fn main() {}\n",
680        )
681        .expect("crate source should be written for context test");
682        fs::write(root.join("AGENTS.md"), "# Rules\n")
683            .expect("agents contract should be written for context test");
684        fs::write(
685            root.join(".agent").join("modes").join("architect.yaml"),
686            "slug: architect\npurpose: read only\n",
687        )
688        .expect("mode file should be written for context test");
689
690        let model = RepoModeler::scan(root).expect("repo model should scan");
691        assert_eq!(model.workspace_kind, WorkspaceKind::SingleCrate);
692        assert_eq!(model.crates.len(), 1);
693        assert_eq!(model.crates[0].name, "app");
694        assert!(model.read_order.contains(&repo_path("AGENTS.md")));
695        assert!(
696            model
697                .read_order
698                .contains(&repo_path(".agent/modes/architect.yaml"))
699        );
700    }
701}