agentkit_context/lib.rs
1//! Context loaders for workspace-local agent instructions.
2//!
3//! This crate discovers and loads `AGENTS.md` files (project-level
4//! instructions) into [`agentkit_core::Item`]s with [`ItemKind::Context`]. The
5//! resulting items slot directly into a transcript alongside system, user, and
6//! assistant messages, so the agent loop and providers do not need a separate
7//! context path.
8//!
9//! # Overview
10//!
11//! * [`AgentsMd`] -- walks ancestor directories to find `AGENTS.md` files.
12//! * [`ContextLoader`] -- combines multiple [`ContextSource`] implementations
13//! and loads them in order.
14//!
15//! # Example
16//!
17//! ```rust,no_run
18//! use agentkit_context::{AgentsMd, ContextLoader};
19//!
20//! # async fn run() -> Result<(), agentkit_context::ContextError> {
21//! let items = ContextLoader::new()
22//! .with_source(AgentsMd::discover("."))
23//! .load()
24//! .await?;
25//! # Ok(())
26//! # }
27//! ```
28
29use std::collections::BTreeSet;
30use std::path::{Path, PathBuf};
31
32use agentkit_core::{Item, ItemKind, MetadataMap, Part, TextPart};
33use async_trait::async_trait;
34use serde_json::Value;
35use thiserror::Error;
36
37const DEFAULT_AGENTS_FILE: &str = "AGENTS.md";
38
39/// Controls how many `AGENTS.md` files [`AgentsMd`] returns during ancestor
40/// discovery.
41#[derive(Clone, Copy, Debug, PartialEq, Eq)]
42pub enum AgentsMdMode {
43 /// Stop at the first (nearest) `AGENTS.md` found while walking upward.
44 Nearest,
45 /// Collect every `AGENTS.md` from the filesystem root down to the start
46 /// directory, ordered from outermost to innermost.
47 All,
48}
49
50/// A source of context [`Item`]s.
51///
52/// Implement this trait to create custom context loaders that can be plugged
53/// into a [`ContextLoader`]. Each call to [`load`](ContextSource::load) should
54/// return zero or more [`Item`]s with [`ItemKind::Context`].
55#[async_trait]
56pub trait ContextSource: Send + Sync {
57 /// Load context items from this source.
58 ///
59 /// # Errors
60 ///
61 /// Returns [`ContextError`] if the underlying filesystem operations fail.
62 async fn load(&self) -> Result<Vec<Item>, ContextError>;
63}
64
65/// Composable loader that gathers context [`Item`]s from multiple
66/// [`ContextSource`] implementations.
67///
68/// Sources are loaded in the order they were added and the resulting items are
69/// concatenated into a single `Vec<Item>`. These items carry
70/// [`ItemKind::Context`] and can be prepended to the transcript before the
71/// user message.
72///
73/// # Example
74///
75/// ```rust,no_run
76/// use agentkit_context::{AgentsMd, ContextLoader};
77///
78/// # async fn run() -> Result<(), agentkit_context::ContextError> {
79/// let items = ContextLoader::new()
80/// .with_source(AgentsMd::discover("."))
81/// .load()
82/// .await?;
83///
84/// println!("loaded {} context items", items.len());
85/// # Ok(())
86/// # }
87/// ```
88#[derive(Default)]
89pub struct ContextLoader {
90 sources: Vec<Box<dyn ContextSource>>,
91}
92
93impl ContextLoader {
94 /// Create an empty loader with no sources.
95 pub fn new() -> Self {
96 Self::default()
97 }
98
99 /// Add a [`ContextSource`] to this loader.
100 ///
101 /// Sources are loaded in the order they are added. This method consumes
102 /// and returns `self` so calls can be chained.
103 pub fn with_source(mut self, source: impl ContextSource + 'static) -> Self {
104 self.sources.push(Box::new(source));
105 self
106 }
107
108 /// Load all registered sources and return a combined list of context
109 /// [`Item`]s.
110 ///
111 /// # Errors
112 ///
113 /// Returns the first [`ContextError`] encountered while loading. Sources
114 /// that appear before the failing source will have already been loaded.
115 pub async fn load(&self) -> Result<Vec<Item>, ContextError> {
116 let mut items = Vec::new();
117
118 for source in &self.sources {
119 items.extend(source.load().await?);
120 }
121
122 Ok(items)
123 }
124}
125
126/// Discovers and loads `AGENTS.md` files by walking ancestor directories.
127///
128/// `AgentsMd` is the primary way to inject project-level instructions into an
129/// agent session. It walks upward from a given starting directory, collecting
130/// `AGENTS.md` files according to the configured [`AgentsMdMode`]. Explicit
131/// paths and extra search directories can be added for cases that fall outside
132/// simple ancestor discovery.
133///
134/// Loaded items carry metadata under the `agentkit.context.*` namespace:
135///
136/// | Key | Value |
137/// |----------------------------|-------------------------------|
138/// | `agentkit.context.source` | `"agents_md"` |
139/// | `agentkit.context.path` | Filesystem path of the file |
140///
141/// # Example
142///
143/// ```rust,no_run
144/// use agentkit_context::AgentsMd;
145/// use agentkit_context::ContextSource; // for `.load()`
146///
147/// # async fn run() -> Result<(), agentkit_context::ContextError> {
148/// // Find the nearest AGENTS.md starting from the current directory.
149/// let items = AgentsMd::discover(".").load().await?;
150///
151/// // Or collect all ancestor AGENTS.md files, with an extra search dir.
152/// let items = AgentsMd::discover_all(".")
153/// .with_search_dir("./.agent")
154/// .load()
155/// .await?;
156/// # Ok(())
157/// # }
158/// ```
159#[derive(Clone, Debug)]
160pub struct AgentsMd {
161 start_dir: PathBuf,
162 mode: AgentsMdMode,
163 file_name: String,
164 explicit_paths: Vec<PathBuf>,
165 search_dirs: Vec<PathBuf>,
166}
167
168impl AgentsMd {
169 /// Create a new `AgentsMd` that searches for the nearest `AGENTS.md`
170 /// starting from `start_dir` and walking upward.
171 ///
172 /// This uses [`AgentsMdMode::Nearest`] by default. Call
173 /// [`with_mode`](Self::with_mode) or use [`discover_all`](Self::discover_all)
174 /// to collect every ancestor match instead.
175 pub fn discover(start_dir: impl Into<PathBuf>) -> Self {
176 Self {
177 start_dir: start_dir.into(),
178 mode: AgentsMdMode::Nearest,
179 file_name: DEFAULT_AGENTS_FILE.into(),
180 explicit_paths: Vec::new(),
181 search_dirs: Vec::new(),
182 }
183 }
184
185 /// Shorthand for `AgentsMd::discover(start_dir).with_mode(AgentsMdMode::All)`.
186 ///
187 /// Collects every `AGENTS.md` from the filesystem root down to `start_dir`,
188 /// ordered outermost-first so that more specific instructions appear last.
189 pub fn discover_all(start_dir: impl Into<PathBuf>) -> Self {
190 Self::discover(start_dir).with_mode(AgentsMdMode::All)
191 }
192
193 /// Set the discovery mode.
194 ///
195 /// See [`AgentsMdMode`] for the available options.
196 pub fn with_mode(mut self, mode: AgentsMdMode) -> Self {
197 self.mode = mode;
198 self
199 }
200
201 /// Override the file name to look for (default: `AGENTS.md`).
202 ///
203 /// Useful when a project uses a different convention such as `CLAUDE.md`.
204 pub fn with_file_name(mut self, file_name: impl Into<String>) -> Self {
205 self.file_name = file_name.into();
206 self
207 }
208
209 /// Add an explicit file path to include.
210 ///
211 /// The path is checked for existence at load time; if it does not exist it
212 /// is silently skipped. Explicit paths are loaded before ancestor discovery
213 /// results.
214 pub fn with_path(mut self, path: impl Into<PathBuf>) -> Self {
215 self.explicit_paths.push(path.into());
216 self
217 }
218
219 /// Add a directory to search for the configured file name.
220 ///
221 /// Unlike ancestor discovery, this checks only the given directory (not its
222 /// ancestors). This is useful for well-known sidecar locations like
223 /// `.agent/` or `.config/`.
224 pub fn with_search_dir(mut self, dir: impl Into<PathBuf>) -> Self {
225 self.search_dirs.push(dir.into());
226 self
227 }
228
229 /// Resolve the first matching path without reading its contents.
230 ///
231 /// Returns `None` when no `AGENTS.md` file is found. This is a convenience
232 /// wrapper around [`resolve_all`](Self::resolve_all).
233 ///
234 /// # Errors
235 ///
236 /// Returns [`ContextError`] if a filesystem metadata check fails.
237 pub async fn resolve(&self) -> Result<Option<PathBuf>, ContextError> {
238 Ok(self.resolve_all().await?.into_iter().next())
239 }
240
241 /// Resolve all matching paths without reading their contents.
242 ///
243 /// The returned paths are deduplicated and ordered from outermost to
244 /// innermost. When the mode is [`AgentsMdMode::Nearest`], at most one path
245 /// is returned.
246 ///
247 /// # Errors
248 ///
249 /// Returns [`ContextError`] if a filesystem metadata check fails.
250 pub async fn resolve_all(&self) -> Result<Vec<PathBuf>, ContextError> {
251 let mut paths = Vec::new();
252
253 for path in &self.explicit_paths {
254 if path_exists(path).await? {
255 paths.push(path.clone());
256 }
257 }
258
259 for dir in &self.search_dirs {
260 let candidate = dir.join(&self.file_name);
261 if path_exists(&candidate).await? {
262 paths.push(candidate);
263 }
264 }
265
266 paths.extend(
267 find_in_ancestors_with_mode(
268 &self.start_dir,
269 &self.file_name,
270 self.mode == AgentsMdMode::All,
271 )
272 .await?,
273 );
274
275 let mut seen = BTreeSet::new();
276 paths.retain(|path| seen.insert(path.clone()));
277 if self.mode == AgentsMdMode::Nearest {
278 Ok(paths.into_iter().rev().take(1).collect())
279 } else {
280 Ok(paths)
281 }
282 }
283}
284
285#[async_trait]
286impl ContextSource for AgentsMd {
287 async fn load(&self) -> Result<Vec<Item>, ContextError> {
288 let paths = self.resolve_all().await?;
289 let mut items = Vec::with_capacity(paths.len());
290
291 for path in paths {
292 let body = async_fs::read_to_string(&path).await.map_err(|error| {
293 ContextError::ReadFailed {
294 path: path.clone(),
295 error,
296 }
297 })?;
298
299 items.push(context_item(
300 format!(
301 "[Loaded AGENTS]\nPath: {}\n\n{}",
302 path.display(),
303 body.trim_end()
304 ),
305 metadata_for("agents_md", &path, None),
306 ));
307 }
308
309 Ok(items)
310 }
311}
312
313fn context_item(text: String, metadata: MetadataMap) -> Item {
314 Item {
315 id: None,
316 kind: ItemKind::Context,
317 parts: vec![Part::Text(TextPart {
318 text,
319 metadata: MetadataMap::new(),
320 })],
321 metadata,
322 }
323}
324
325fn metadata_for(source_kind: &str, path: &Path, name: Option<String>) -> MetadataMap {
326 let mut metadata = MetadataMap::new();
327 metadata.insert(
328 "agentkit.context.source".into(),
329 Value::String(source_kind.into()),
330 );
331 metadata.insert(
332 "agentkit.context.path".into(),
333 Value::String(path.display().to_string()),
334 );
335 if let Some(name) = name {
336 metadata.insert("agentkit.context.name".into(), Value::String(name));
337 }
338 metadata
339}
340
341async fn path_exists(path: &Path) -> Result<bool, ContextError> {
342 match async_fs::metadata(path).await {
343 Ok(_) => Ok(true),
344 Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(false),
345 Err(error) => Err(ContextError::InspectFailed {
346 path: path.to_path_buf(),
347 error,
348 }),
349 }
350}
351
352async fn find_in_ancestors_with_mode(
353 start_dir: &Path,
354 file_name: &str,
355 include_all: bool,
356) -> Result<Vec<PathBuf>, ContextError> {
357 let mut current = start_dir.to_path_buf();
358 let mut matches = Vec::new();
359
360 loop {
361 let candidate = current.join(file_name);
362 if path_exists(&candidate).await? {
363 matches.push(candidate);
364 if !include_all {
365 break;
366 }
367 }
368 let Some(parent) = current.parent() else {
369 break;
370 };
371 current = parent.to_path_buf();
372 }
373
374 matches.reverse();
375 Ok(matches)
376}
377
378/// Errors that can occur while discovering or reading context files.
379#[derive(Debug, Error)]
380pub enum ContextError {
381 /// A filesystem metadata or directory-listing operation failed.
382 ///
383 /// This typically means the path exists but is not accessible (permission
384 /// denied, broken symlink, etc.).
385 #[error("failed to inspect {path}: {error}")]
386 InspectFailed {
387 /// The path that could not be inspected.
388 path: PathBuf,
389 /// The underlying I/O error.
390 #[source]
391 error: std::io::Error,
392 },
393 /// Reading the contents of a discovered file failed.
394 #[error("failed to read {path}: {error}")]
395 ReadFailed {
396 /// The path that could not be read.
397 path: PathBuf,
398 /// The underlying I/O error.
399 #[source]
400 error: std::io::Error,
401 },
402}
403
404#[cfg(test)]
405mod tests {
406 use std::time::{SystemTime, UNIX_EPOCH};
407
408 use super::*;
409
410 #[tokio::test]
411 async fn discovers_agents_file_in_ancestors() {
412 let root = temp_path("agentkit-context-agents");
413 let nested = root.join("nested/project");
414 async_fs::create_dir_all(&nested).await.unwrap();
415 let agents_path = root.join("AGENTS.md");
416 async_fs::write(&agents_path, "project = lantern")
417 .await
418 .unwrap();
419
420 let items = AgentsMd::discover(&nested).load().await.unwrap();
421 assert_eq!(items.len(), 1);
422 assert_eq!(items[0].kind, ItemKind::Context);
423 assert_eq!(
424 items[0].metadata.get("agentkit.context.source"),
425 Some(&Value::String("agents_md".into()))
426 );
427
428 async_fs::remove_dir_all(&root).await.unwrap();
429 }
430
431 #[tokio::test]
432 async fn discovers_all_agents_files_when_requested() {
433 let root = temp_path("agentkit-context-agents-all");
434 let nested = root.join("nested/project");
435 async_fs::create_dir_all(&nested).await.unwrap();
436 async_fs::write(root.join("AGENTS.md"), "project = lantern")
437 .await
438 .unwrap();
439 async_fs::write(root.join("nested/AGENTS.md"), "team = orbit")
440 .await
441 .unwrap();
442
443 let items = AgentsMd::discover_all(&nested).load().await.unwrap();
444 assert_eq!(items.len(), 2);
445
446 async_fs::remove_dir_all(&root).await.unwrap();
447 }
448
449 #[tokio::test]
450 async fn loads_agents_from_explicit_search_paths() {
451 let root = temp_path("agentkit-context-agents-explicit");
452 let nested = root.join("nested/project");
453 let shared = root.join("shared");
454 async_fs::create_dir_all(&nested).await.unwrap();
455 async_fs::create_dir_all(&shared).await.unwrap();
456 async_fs::write(shared.join("AGENTS.md"), "policy = explicit")
457 .await
458 .unwrap();
459
460 let items = AgentsMd::discover(&nested)
461 .with_search_dir(&shared)
462 .load()
463 .await
464 .unwrap();
465 assert_eq!(items.len(), 1);
466 assert!(
467 items[0]
468 .metadata
469 .get("agentkit.context.path")
470 .and_then(Value::as_str)
471 .is_some_and(|path| path.ends_with("/shared/AGENTS.md"))
472 );
473
474 async_fs::remove_dir_all(&root).await.unwrap();
475 }
476
477 fn temp_path(prefix: &str) -> PathBuf {
478 let suffix = SystemTime::now()
479 .duration_since(UNIX_EPOCH)
480 .unwrap()
481 .as_nanos();
482 std::env::temp_dir().join(format!("{prefix}-{suffix}"))
483 }
484}