Skip to main content

agentkit_context/
lib.rs

1//! Context loaders for workspace-local agent instructions.
2//!
3//! This crate discovers and loads `AGENTS.md` files (project-level
4//! instructions) into [`agentkit_core::Item`]s with [`ItemKind::Context`]. The
5//! resulting items slot directly into a transcript alongside system, user, and
6//! assistant messages, so the agent loop and providers do not need a separate
7//! context path.
8//!
9//! # Overview
10//!
11//! * [`AgentsMd`] -- walks ancestor directories to find `AGENTS.md` files.
12//! * [`ContextLoader`] -- combines multiple [`ContextSource`] implementations
13//!   and loads them in order.
14//!
15//! # Example
16//!
17//! ```rust,no_run
18//! use agentkit_context::{AgentsMd, ContextLoader};
19//!
20//! # async fn run() -> Result<(), agentkit_context::ContextError> {
21//! let items = ContextLoader::new()
22//!     .with_source(AgentsMd::discover("."))
23//!     .load()
24//!     .await?;
25//! # Ok(())
26//! # }
27//! ```
28
29use std::collections::BTreeSet;
30use std::path::{Path, PathBuf};
31
32use agentkit_core::{Item, ItemKind, MetadataMap, Part, TextPart};
33use async_trait::async_trait;
34use serde_json::Value;
35use thiserror::Error;
36
37const DEFAULT_AGENTS_FILE: &str = "AGENTS.md";
38
39/// Controls how many `AGENTS.md` files [`AgentsMd`] returns during ancestor
40/// discovery.
41#[derive(Clone, Copy, Debug, PartialEq, Eq)]
42pub enum AgentsMdMode {
43    /// Stop at the first (nearest) `AGENTS.md` found while walking upward.
44    Nearest,
45    /// Collect every `AGENTS.md` from the filesystem root down to the start
46    /// directory, ordered from outermost to innermost.
47    All,
48}
49
50/// A source of context [`Item`]s.
51///
52/// Implement this trait to create custom context loaders that can be plugged
53/// into a [`ContextLoader`]. Each call to [`load`](ContextSource::load) should
54/// return zero or more [`Item`]s with [`ItemKind::Context`].
55#[async_trait]
56pub trait ContextSource: Send + Sync {
57    /// Load context items from this source.
58    ///
59    /// # Errors
60    ///
61    /// Returns [`ContextError`] if the underlying filesystem operations fail.
62    async fn load(&self) -> Result<Vec<Item>, ContextError>;
63}
64
65/// Composable loader that gathers context [`Item`]s from multiple
66/// [`ContextSource`] implementations.
67///
68/// Sources are loaded in the order they were added and the resulting items are
69/// concatenated into a single `Vec<Item>`. These items carry
70/// [`ItemKind::Context`] and can be prepended to the transcript before the
71/// user message.
72///
73/// # Example
74///
75/// ```rust,no_run
76/// use agentkit_context::{AgentsMd, ContextLoader};
77///
78/// # async fn run() -> Result<(), agentkit_context::ContextError> {
79/// let items = ContextLoader::new()
80///     .with_source(AgentsMd::discover("."))
81///     .load()
82///     .await?;
83///
84/// println!("loaded {} context items", items.len());
85/// # Ok(())
86/// # }
87/// ```
88#[derive(Default)]
89pub struct ContextLoader {
90    sources: Vec<Box<dyn ContextSource>>,
91}
92
93impl ContextLoader {
94    /// Create an empty loader with no sources.
95    pub fn new() -> Self {
96        Self::default()
97    }
98
99    /// Add a [`ContextSource`] to this loader.
100    ///
101    /// Sources are loaded in the order they are added. This method consumes
102    /// and returns `self` so calls can be chained.
103    pub fn with_source(mut self, source: impl ContextSource + 'static) -> Self {
104        self.sources.push(Box::new(source));
105        self
106    }
107
108    /// Load all registered sources and return a combined list of context
109    /// [`Item`]s.
110    ///
111    /// # Errors
112    ///
113    /// Returns the first [`ContextError`] encountered while loading. Sources
114    /// that appear before the failing source will have already been loaded.
115    pub async fn load(&self) -> Result<Vec<Item>, ContextError> {
116        let mut items = Vec::new();
117
118        for source in &self.sources {
119            items.extend(source.load().await?);
120        }
121
122        Ok(items)
123    }
124}
125
126/// Discovers and loads `AGENTS.md` files by walking ancestor directories.
127///
128/// `AgentsMd` is the primary way to inject project-level instructions into an
129/// agent session. It walks upward from a given starting directory, collecting
130/// `AGENTS.md` files according to the configured [`AgentsMdMode`]. Explicit
131/// paths and extra search directories can be added for cases that fall outside
132/// simple ancestor discovery.
133///
134/// Loaded items carry metadata under the `agentkit.context.*` namespace:
135///
136/// | Key                        | Value                         |
137/// |----------------------------|-------------------------------|
138/// | `agentkit.context.source`  | `"agents_md"`                 |
139/// | `agentkit.context.path`    | Filesystem path of the file   |
140///
141/// # Example
142///
143/// ```rust,no_run
144/// use agentkit_context::AgentsMd;
145/// use agentkit_context::ContextSource; // for `.load()`
146///
147/// # async fn run() -> Result<(), agentkit_context::ContextError> {
148/// // Find the nearest AGENTS.md starting from the current directory.
149/// let items = AgentsMd::discover(".").load().await?;
150///
151/// // Or collect all ancestor AGENTS.md files, with an extra search dir.
152/// let items = AgentsMd::discover_all(".")
153///     .with_search_dir("./.agent")
154///     .load()
155///     .await?;
156/// # Ok(())
157/// # }
158/// ```
159#[derive(Clone, Debug)]
160pub struct AgentsMd {
161    start_dir: PathBuf,
162    mode: AgentsMdMode,
163    file_name: String,
164    explicit_paths: Vec<PathBuf>,
165    search_dirs: Vec<PathBuf>,
166}
167
168impl AgentsMd {
169    /// Create a new `AgentsMd` that searches for the nearest `AGENTS.md`
170    /// starting from `start_dir` and walking upward.
171    ///
172    /// This uses [`AgentsMdMode::Nearest`] by default. Call
173    /// [`with_mode`](Self::with_mode) or use [`discover_all`](Self::discover_all)
174    /// to collect every ancestor match instead.
175    pub fn discover(start_dir: impl Into<PathBuf>) -> Self {
176        Self {
177            start_dir: start_dir.into(),
178            mode: AgentsMdMode::Nearest,
179            file_name: DEFAULT_AGENTS_FILE.into(),
180            explicit_paths: Vec::new(),
181            search_dirs: Vec::new(),
182        }
183    }
184
185    /// Shorthand for `AgentsMd::discover(start_dir).with_mode(AgentsMdMode::All)`.
186    ///
187    /// Collects every `AGENTS.md` from the filesystem root down to `start_dir`,
188    /// ordered outermost-first so that more specific instructions appear last.
189    pub fn discover_all(start_dir: impl Into<PathBuf>) -> Self {
190        Self::discover(start_dir).with_mode(AgentsMdMode::All)
191    }
192
193    /// Set the discovery mode.
194    ///
195    /// See [`AgentsMdMode`] for the available options.
196    pub fn with_mode(mut self, mode: AgentsMdMode) -> Self {
197        self.mode = mode;
198        self
199    }
200
201    /// Override the file name to look for (default: `AGENTS.md`).
202    ///
203    /// Useful when a project uses a different convention such as `CLAUDE.md`.
204    pub fn with_file_name(mut self, file_name: impl Into<String>) -> Self {
205        self.file_name = file_name.into();
206        self
207    }
208
209    /// Add an explicit file path to include.
210    ///
211    /// The path is checked for existence at load time; if it does not exist it
212    /// is silently skipped. Explicit paths are loaded before ancestor discovery
213    /// results.
214    pub fn with_path(mut self, path: impl Into<PathBuf>) -> Self {
215        self.explicit_paths.push(path.into());
216        self
217    }
218
219    /// Add a directory to search for the configured file name.
220    ///
221    /// Unlike ancestor discovery, this checks only the given directory (not its
222    /// ancestors). This is useful for well-known sidecar locations like
223    /// `.agent/` or `.config/`.
224    pub fn with_search_dir(mut self, dir: impl Into<PathBuf>) -> Self {
225        self.search_dirs.push(dir.into());
226        self
227    }
228
229    /// Resolve the first matching path without reading its contents.
230    ///
231    /// Returns `None` when no `AGENTS.md` file is found. This is a convenience
232    /// wrapper around [`resolve_all`](Self::resolve_all).
233    ///
234    /// # Errors
235    ///
236    /// Returns [`ContextError`] if a filesystem metadata check fails.
237    pub async fn resolve(&self) -> Result<Option<PathBuf>, ContextError> {
238        Ok(self.resolve_all().await?.into_iter().next())
239    }
240
241    /// Resolve all matching paths without reading their contents.
242    ///
243    /// The returned paths are deduplicated and ordered from outermost to
244    /// innermost. When the mode is [`AgentsMdMode::Nearest`], at most one path
245    /// is returned.
246    ///
247    /// # Errors
248    ///
249    /// Returns [`ContextError`] if a filesystem metadata check fails.
250    pub async fn resolve_all(&self) -> Result<Vec<PathBuf>, ContextError> {
251        let mut paths = Vec::new();
252
253        for path in &self.explicit_paths {
254            if path_exists(path).await? {
255                paths.push(path.clone());
256            }
257        }
258
259        for dir in &self.search_dirs {
260            let candidate = dir.join(&self.file_name);
261            if path_exists(&candidate).await? {
262                paths.push(candidate);
263            }
264        }
265
266        paths.extend(
267            find_in_ancestors_with_mode(
268                &self.start_dir,
269                &self.file_name,
270                self.mode == AgentsMdMode::All,
271            )
272            .await?,
273        );
274
275        let mut seen = BTreeSet::new();
276        paths.retain(|path| seen.insert(path.clone()));
277        if self.mode == AgentsMdMode::Nearest {
278            Ok(paths.into_iter().rev().take(1).collect())
279        } else {
280            Ok(paths)
281        }
282    }
283}
284
285#[async_trait]
286impl ContextSource for AgentsMd {
287    async fn load(&self) -> Result<Vec<Item>, ContextError> {
288        let paths = self.resolve_all().await?;
289        let mut items = Vec::with_capacity(paths.len());
290
291        for path in paths {
292            let body = async_fs::read_to_string(&path).await.map_err(|error| {
293                ContextError::ReadFailed {
294                    path: path.clone(),
295                    error,
296                }
297            })?;
298
299            items.push(context_item(
300                format!(
301                    "[Loaded AGENTS]\nPath: {}\n\n{}",
302                    path.display(),
303                    body.trim_end()
304                ),
305                metadata_for("agents_md", &path, None),
306            ));
307        }
308
309        Ok(items)
310    }
311}
312
313fn context_item(text: String, metadata: MetadataMap) -> Item {
314    Item {
315        id: None,
316        kind: ItemKind::Context,
317        parts: vec![Part::Text(TextPart {
318            text,
319            metadata: MetadataMap::new(),
320        })],
321        metadata,
322        usage: None,
323        finish_reason: None,
324        created_at: None,
325    }
326}
327
328fn metadata_for(source_kind: &str, path: &Path, name: Option<String>) -> MetadataMap {
329    let mut metadata = MetadataMap::new();
330    metadata.insert(
331        "agentkit.context.source".into(),
332        Value::String(source_kind.into()),
333    );
334    metadata.insert(
335        "agentkit.context.path".into(),
336        Value::String(path.display().to_string()),
337    );
338    if let Some(name) = name {
339        metadata.insert("agentkit.context.name".into(), Value::String(name));
340    }
341    metadata
342}
343
344async fn path_exists(path: &Path) -> Result<bool, ContextError> {
345    match async_fs::metadata(path).await {
346        Ok(_) => Ok(true),
347        Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(false),
348        Err(error) => Err(ContextError::InspectFailed {
349            path: path.to_path_buf(),
350            error,
351        }),
352    }
353}
354
355async fn find_in_ancestors_with_mode(
356    start_dir: &Path,
357    file_name: &str,
358    include_all: bool,
359) -> Result<Vec<PathBuf>, ContextError> {
360    let mut current = start_dir.to_path_buf();
361    let mut matches = Vec::new();
362
363    loop {
364        let candidate = current.join(file_name);
365        if path_exists(&candidate).await? {
366            matches.push(candidate);
367            if !include_all {
368                break;
369            }
370        }
371        let Some(parent) = current.parent() else {
372            break;
373        };
374        current = parent.to_path_buf();
375    }
376
377    matches.reverse();
378    Ok(matches)
379}
380
381/// Errors that can occur while discovering or reading context files.
382#[derive(Debug, Error)]
383pub enum ContextError {
384    /// A filesystem metadata or directory-listing operation failed.
385    ///
386    /// This typically means the path exists but is not accessible (permission
387    /// denied, broken symlink, etc.).
388    #[error("failed to inspect {path}: {error}")]
389    InspectFailed {
390        /// The path that could not be inspected.
391        path: PathBuf,
392        /// The underlying I/O error.
393        #[source]
394        error: std::io::Error,
395    },
396    /// Reading the contents of a discovered file failed.
397    #[error("failed to read {path}: {error}")]
398    ReadFailed {
399        /// The path that could not be read.
400        path: PathBuf,
401        /// The underlying I/O error.
402        #[source]
403        error: std::io::Error,
404    },
405}
406
407#[cfg(test)]
408mod tests {
409    use std::time::{SystemTime, UNIX_EPOCH};
410
411    use super::*;
412
413    #[tokio::test]
414    async fn discovers_agents_file_in_ancestors() {
415        let root = temp_path("agentkit-context-agents");
416        let nested = root.join("nested/project");
417        async_fs::create_dir_all(&nested).await.unwrap();
418        let agents_path = root.join("AGENTS.md");
419        async_fs::write(&agents_path, "project = lantern")
420            .await
421            .unwrap();
422
423        let items = AgentsMd::discover(&nested).load().await.unwrap();
424        assert_eq!(items.len(), 1);
425        assert_eq!(items[0].kind, ItemKind::Context);
426        assert_eq!(
427            items[0].metadata.get("agentkit.context.source"),
428            Some(&Value::String("agents_md".into()))
429        );
430
431        async_fs::remove_dir_all(&root).await.unwrap();
432    }
433
434    #[tokio::test]
435    async fn discovers_all_agents_files_when_requested() {
436        let root = temp_path("agentkit-context-agents-all");
437        let nested = root.join("nested/project");
438        async_fs::create_dir_all(&nested).await.unwrap();
439        async_fs::write(root.join("AGENTS.md"), "project = lantern")
440            .await
441            .unwrap();
442        async_fs::write(root.join("nested/AGENTS.md"), "team = orbit")
443            .await
444            .unwrap();
445
446        let items = AgentsMd::discover_all(&nested).load().await.unwrap();
447        assert_eq!(items.len(), 2);
448
449        async_fs::remove_dir_all(&root).await.unwrap();
450    }
451
452    #[tokio::test]
453    async fn loads_agents_from_explicit_search_paths() {
454        let root = temp_path("agentkit-context-agents-explicit");
455        let nested = root.join("nested/project");
456        let shared = root.join("shared");
457        async_fs::create_dir_all(&nested).await.unwrap();
458        async_fs::create_dir_all(&shared).await.unwrap();
459        async_fs::write(shared.join("AGENTS.md"), "policy = explicit")
460            .await
461            .unwrap();
462
463        let items = AgentsMd::discover(&nested)
464            .with_search_dir(&shared)
465            .load()
466            .await
467            .unwrap();
468        assert_eq!(items.len(), 1);
469        assert!(
470            items[0]
471                .metadata
472                .get("agentkit.context.path")
473                .and_then(Value::as_str)
474                .is_some_and(|path| path.ends_with("/shared/AGENTS.md"))
475        );
476
477        async_fs::remove_dir_all(&root).await.unwrap();
478    }
479
480    fn temp_path(prefix: &str) -> PathBuf {
481        let suffix = SystemTime::now()
482            .duration_since(UNIX_EPOCH)
483            .unwrap()
484            .as_nanos();
485        std::env::temp_dir().join(format!("{prefix}-{suffix}"))
486    }
487}