1use std::path::{Path, PathBuf};
16
17use globset::{Glob, GlobSetBuilder};
18use serde::{Serialize, Serializer};
19
20use crate::agents::AgentId;
21
22#[derive(Debug, Clone, Serialize)]
24pub struct AgentDoc {
25 pub agent: AgentId,
26 pub files: Vec<MatchedFile>,
27}
28
29#[derive(Debug, Clone)]
36pub struct MatchedFile {
37 pub path: PathBuf,
38 pub bytes: u64,
39 pub content: String,
40}
41
42impl Serialize for MatchedFile {
43 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
44 where
45 S: Serializer,
46 {
47 use serde::ser::SerializeStruct;
48 let mut state = serializer.serialize_struct("MatchedFile", 3)?;
49 let path_str = self
50 .path
51 .to_string_lossy()
52 .replace(std::path::MAIN_SEPARATOR, "/");
53 state.serialize_field("path", &path_str)?;
54 state.serialize_field("bytes", &self.bytes)?;
55 state.serialize_field("content", &self.content)?;
56 state.end()
57 }
58}
59
60#[derive(Debug, Clone, Serialize)]
62pub struct RepoContext {
63 pub name: String,
64 pub path: PathBuf,
65 pub branch: Option<String>,
66 pub agent_docs: Vec<AgentDoc>,
67 pub warnings: Vec<String>,
68}
69
70#[derive(Debug, Clone, Serialize)]
76#[serde(tag = "kind", rename_all = "snake_case")]
77pub enum Scope {
78 All,
79 Workspace { name: String },
80 Repos { repos: Vec<String> },
81}
82
83#[derive(Debug, Clone, Serialize)]
86pub struct Context {
87 pub schema_version: u32,
88 pub generated_at: String,
89 pub agents: Vec<AgentId>,
90 pub scope: Scope,
91 pub repos: Vec<RepoContext>,
92 pub warnings: Vec<String>,
93}
94
95pub const SCHEMA_VERSION: u32 = 1;
98
99impl RepoContext {
100 #[must_use]
108 pub fn build_one(name: &str, repo_path: &Path, agents: &[AgentId]) -> Self {
109 let mut warnings: Vec<String> = Vec::new();
110
111 let canonical = match crate::path::canonicalize(repo_path) {
115 Ok(p) => p,
116 Err(e) => {
117 return Self {
118 name: name.to_string(),
119 path: repo_path.to_path_buf(),
120 branch: None,
121 agent_docs: Vec::new(),
122 warnings: vec![format!("path no longer accessible: {e}")],
123 };
124 }
125 };
126
127 let (branch, branch_warning) = read_branch(&canonical);
128 if let Some(w) = branch_warning {
129 warnings.push(w);
130 }
131
132 let (agent_docs, doc_warnings) = resolve_agent_docs(&canonical, agents);
133 warnings.extend(doc_warnings);
134
135 Self {
136 name: name.to_string(),
137 path: canonical,
138 branch,
139 agent_docs,
140 warnings,
141 }
142 }
143}
144
145fn read_branch(repo_path: &Path) -> (Option<String>, Option<String>) {
151 let repo = match git2::Repository::open(repo_path) {
152 Ok(r) => r,
153 Err(e) => {
154 return (
155 None,
156 Some(format!(
157 "no longer a git repository at {}: {}",
158 repo_path.display(),
159 e.message()
160 )),
161 );
162 }
163 };
164
165 if repo.is_bare() {
166 return (None, None);
167 }
168
169 match repo.head() {
170 Ok(head) if head.is_branch() => (head.shorthand().map(ToString::to_string), None),
171 Ok(_) => (None, None), Err(e) if e.code() == git2::ErrorCode::UnbornBranch => (None, None),
173 Err(e) => (None, Some(format!("could not read HEAD: {}", e.message()))),
174 }
175}
176
177#[must_use]
194pub fn resolve_agent_docs(repo_root: &Path, agents: &[AgentId]) -> (Vec<AgentDoc>, Vec<String>) {
195 let mut docs = Vec::with_capacity(agents.len());
196 let mut warnings = Vec::new();
197
198 for agent in agents {
199 let mut files = Vec::new();
200 let mut seen: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
201
202 for pattern in agent.file_patterns() {
203 match classify_pattern(pattern) {
204 PatternKind::Flat(relpath) => {
205 let abs = repo_root.join(&relpath);
206 if let Some(matched) = read_matched_file(&abs, &relpath, &mut warnings) {
207 if seen.insert(relpath.clone()) {
208 files.push(matched);
209 }
210 }
211 }
212 PatternKind::Glob { parent, pattern } => {
213 expand_glob(
214 repo_root,
215 &parent,
216 &pattern,
217 &mut files,
218 &mut seen,
219 &mut warnings,
220 );
221 }
222 }
223 }
224
225 files.sort_by(|a, b| a.path.cmp(&b.path));
226 docs.push(AgentDoc {
227 agent: *agent,
228 files,
229 });
230 }
231
232 (docs, warnings)
233}
234
235enum PatternKind {
236 Flat(PathBuf),
240 Glob { parent: PathBuf, pattern: String },
244}
245
246fn classify_pattern(pattern: &'static str) -> PatternKind {
247 let has_glob = pattern.contains(['*', '?', '[']);
248 if !has_glob {
249 return PatternKind::Flat(PathBuf::from(pattern));
250 }
251 let (parent, leaf) = pattern.rsplit_once('/').map_or_else(
255 || (PathBuf::from("."), pattern.to_string()),
256 |(p, l)| (PathBuf::from(p), l.to_string()),
257 );
258 PatternKind::Glob {
259 parent,
260 pattern: leaf,
261 }
262}
263
264fn expand_glob(
265 repo_root: &Path,
266 parent_rel: &Path,
267 pattern: &str,
268 files: &mut Vec<MatchedFile>,
269 seen: &mut std::collections::BTreeSet<PathBuf>,
270 warnings: &mut Vec<String>,
271) {
272 let parent_abs = repo_root.join(parent_rel);
273 let entries = match fs_err::read_dir(&parent_abs) {
274 Ok(it) => it,
275 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return,
276 Err(e) => {
277 warnings.push(format!("could not list {}: {e}", display_rel(parent_rel)));
278 return;
279 }
280 };
281
282 let glob = match Glob::new(pattern) {
283 Ok(g) => g,
284 Err(e) => {
285 warnings.push(format!("invalid glob '{pattern}': {e}"));
286 return;
287 }
288 };
289 let mut builder = GlobSetBuilder::new();
290 builder.add(glob);
291 let set = match builder.build() {
292 Ok(s) => s,
293 Err(e) => {
294 warnings.push(format!("could not compile glob '{pattern}': {e}"));
295 return;
296 }
297 };
298
299 for entry in entries.flatten() {
300 let Ok(file_type) = entry.file_type() else {
301 continue;
302 };
303 if file_type.is_dir() {
304 continue;
305 }
306 let file_name = entry.file_name();
307 if !set.is_match(&file_name) {
308 continue;
309 }
310 let rel = parent_rel.join(&file_name);
311 let abs = entry.path();
312 if let Some(matched) = read_matched_file(&abs, &rel, warnings) {
313 if seen.insert(rel) {
314 files.push(matched);
315 }
316 }
317 }
318}
319
320fn read_matched_file(
321 abs: &Path,
322 relpath: &Path,
323 warnings: &mut Vec<String>,
324) -> Option<MatchedFile> {
325 let bytes = match fs_err::read(abs) {
326 Ok(b) => b,
327 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return None,
328 Err(e) => {
329 warnings.push(format!("{}: {e}", display_rel(relpath)));
330 return None;
331 }
332 };
333 let len = bytes.len() as u64;
334 let Ok(content) = String::from_utf8(bytes) else {
335 warnings.push(format!(
336 "{}: file is not valid UTF-8, skipped",
337 display_rel(relpath)
338 ));
339 return None;
340 };
341 Some(MatchedFile {
342 path: relpath.to_path_buf(),
343 bytes: len,
344 content,
345 })
346}
347
348fn display_rel(path: &Path) -> String {
349 path.to_string_lossy()
350 .replace(std::path::MAIN_SEPARATOR, "/")
351}
352
353#[cfg(test)]
354mod tests {
355 #![allow(clippy::unwrap_used, clippy::expect_used)]
356 use super::*;
357 use tempfile::TempDir;
358
359 fn init_repo(parent: &Path, name: &str) -> PathBuf {
360 let path = parent.join(name);
361 std::fs::create_dir_all(&path).unwrap();
362 {
363 let repo = git2::Repository::init(&path).unwrap();
364 let sig = git2::Signature::now("T", "t@e").unwrap();
365 let tree_id = {
366 let mut index = repo.index().unwrap();
367 index.write_tree().unwrap()
368 };
369 let tree = repo.find_tree(tree_id).unwrap();
370 repo.commit(Some("HEAD"), &sig, &sig, "init", &tree, &[])
371 .unwrap();
372 }
373 crate::path::canonicalize(&path).unwrap()
374 }
375
376 #[test]
379 fn classify_every_v1_pattern() {
380 for agent in AgentId::all() {
381 for pattern in agent.file_patterns() {
382 let kind = classify_pattern(pattern);
383 let has_meta = pattern.contains(['*', '?', '[']);
384 match (kind, has_meta) {
385 (PatternKind::Flat(_), false) | (PatternKind::Glob { .. }, true) => {}
386 (PatternKind::Flat(_), true) => {
387 panic!("pattern {pattern:?} has glob meta but classified Flat");
388 }
389 (PatternKind::Glob { .. }, false) => {
390 panic!("pattern {pattern:?} has no glob meta but classified Glob");
391 }
392 }
393 }
394 }
395 }
396
397 #[test]
398 fn classify_flat_pattern_keeps_subdir_prefix() {
399 match classify_pattern(".github/copilot-instructions.md") {
400 PatternKind::Flat(p) => {
401 assert_eq!(p, PathBuf::from(".github/copilot-instructions.md"));
402 }
403 PatternKind::Glob { .. } => panic!("expected Flat"),
404 }
405 }
406
407 #[test]
408 fn classify_glob_pattern_splits_parent_and_leaf() {
409 match classify_pattern(".cursor/rules/*.md") {
410 PatternKind::Glob { parent, pattern } => {
411 assert_eq!(parent, PathBuf::from(".cursor/rules"));
412 assert_eq!(pattern, "*.md");
413 }
414 PatternKind::Flat(_) => panic!("expected Glob"),
415 }
416 }
417
418 #[test]
421 fn empty_repo_yields_empty_files_per_agent() {
422 let tmp = TempDir::new().unwrap();
423 let repo = init_repo(tmp.path(), "r");
424 let (docs, warnings) = resolve_agent_docs(&repo, &[AgentId::ClaudeCode, AgentId::Cursor]);
425 assert_eq!(docs.len(), 2);
426 assert!(docs[0].files.is_empty());
427 assert!(docs[1].files.is_empty());
428 assert!(warnings.is_empty());
429 }
430
431 #[test]
432 fn flat_pattern_resolves_to_root_level_file() {
433 let tmp = TempDir::new().unwrap();
434 let repo = init_repo(tmp.path(), "r");
435 std::fs::write(repo.join("CLAUDE.md"), "hello\n").unwrap();
436 let (docs, warnings) = resolve_agent_docs(&repo, &[AgentId::ClaudeCode]);
437 assert!(warnings.is_empty());
438 assert_eq!(docs.len(), 1);
439 assert_eq!(docs[0].files.len(), 1);
440 assert_eq!(docs[0].files[0].path, PathBuf::from("CLAUDE.md"));
441 assert_eq!(docs[0].files[0].content, "hello\n");
442 assert_eq!(docs[0].files[0].bytes, 6);
443 }
444
445 #[test]
446 fn glob_pattern_expands_to_multiple_files_under_known_dir() {
447 let tmp = TempDir::new().unwrap();
448 let repo = init_repo(tmp.path(), "r");
449 std::fs::create_dir_all(repo.join(".cursor/rules")).unwrap();
450 std::fs::write(repo.join(".cursor/rules/style.md"), "s").unwrap();
451 std::fs::write(repo.join(".cursor/rules/tests.md"), "t").unwrap();
452
453 let (docs, warnings) = resolve_agent_docs(&repo, &[AgentId::Cursor]);
454 assert!(warnings.is_empty());
455 assert_eq!(docs.len(), 1);
456 let paths: Vec<_> = docs[0].files.iter().map(|f| f.path.clone()).collect();
457 assert_eq!(
458 paths,
459 vec![
460 PathBuf::from(".cursor/rules/style.md"),
461 PathBuf::from(".cursor/rules/tests.md"),
462 ],
463 "files sorted by relative path"
464 );
465 }
466
467 #[test]
468 fn mixed_flat_and_glob_under_same_agent() {
469 let tmp = TempDir::new().unwrap();
470 let repo = init_repo(tmp.path(), "r");
471 std::fs::create_dir_all(repo.join(".cursor/rules")).unwrap();
472 std::fs::write(repo.join(".cursor/rules/style.md"), "s").unwrap();
473 std::fs::write(repo.join(".cursorrules"), "legacy").unwrap();
474
475 let (docs, _) = resolve_agent_docs(&repo, &[AgentId::Cursor]);
476 let paths: Vec<_> = docs[0].files.iter().map(|f| f.path.clone()).collect();
477 assert_eq!(
478 paths,
479 vec![
480 PathBuf::from(".cursor/rules/style.md"),
481 PathBuf::from(".cursorrules"),
482 ]
483 );
484 }
485
486 #[test]
487 fn nested_claude_md_does_not_match() {
488 let tmp = TempDir::new().unwrap();
489 let repo = init_repo(tmp.path(), "r");
490 std::fs::create_dir_all(repo.join("docs")).unwrap();
491 std::fs::write(repo.join("docs/CLAUDE.md"), "nested").unwrap();
492
493 let (docs, warnings) = resolve_agent_docs(&repo, &[AgentId::ClaudeCode]);
494 assert!(docs[0].files.is_empty(), "no recursive walk");
495 assert!(warnings.is_empty(), "missing root file is not a warning");
496 }
497
498 #[test]
499 fn deep_node_modules_is_not_walked() {
500 let tmp = TempDir::new().unwrap();
501 let repo = init_repo(tmp.path(), "r");
502 let deep = repo.join("node_modules/some-pkg/internals/CLAUDE.md");
505 std::fs::create_dir_all(deep.parent().unwrap()).unwrap();
506 std::fs::write(&deep, "noise").unwrap();
507
508 let (docs, _) = resolve_agent_docs(&repo, &[AgentId::ClaudeCode]);
509 assert!(docs[0].files.is_empty());
510 }
511
512 #[test]
513 fn non_utf8_file_is_skipped_with_warning() {
514 let tmp = TempDir::new().unwrap();
515 let repo = init_repo(tmp.path(), "r");
516 std::fs::write(repo.join(".cursorrules"), [0xFF, 0xFE]).unwrap();
517
518 let (docs, warnings) = resolve_agent_docs(&repo, &[AgentId::Cursor]);
519 assert!(docs[0].files.is_empty());
520 assert_eq!(warnings.len(), 1);
521 assert!(
522 warnings[0].contains(".cursorrules") && warnings[0].contains("UTF-8"),
523 "warning names file and reason, got: {warnings:?}"
524 );
525 }
526
527 #[test]
528 fn copilot_pattern_resolves_under_dot_github() {
529 let tmp = TempDir::new().unwrap();
530 let repo = init_repo(tmp.path(), "r");
531 std::fs::create_dir_all(repo.join(".github")).unwrap();
532 std::fs::write(repo.join(".github/copilot-instructions.md"), "x").unwrap();
533
534 let (docs, warnings) = resolve_agent_docs(&repo, &[AgentId::Copilot]);
535 assert!(warnings.is_empty());
536 assert_eq!(docs[0].files.len(), 1);
537 assert_eq!(
538 docs[0].files[0].path,
539 PathBuf::from(".github/copilot-instructions.md")
540 );
541 }
542
543 #[test]
546 fn build_one_populates_branch_and_files() {
547 let tmp = TempDir::new().unwrap();
548 let repo = init_repo(tmp.path(), "r");
549 std::fs::write(repo.join("CLAUDE.md"), "ctx\n").unwrap();
550
551 let rc = RepoContext::build_one("r", &repo, &[AgentId::ClaudeCode]);
552 assert_eq!(rc.name, "r");
553 assert_eq!(rc.path, repo);
554 assert!(rc.branch.is_some(), "branch resolved from HEAD");
555 assert_eq!(rc.agent_docs.len(), 1);
556 assert_eq!(rc.agent_docs[0].files.len(), 1);
557 assert!(rc.warnings.is_empty());
558 }
559
560 #[test]
561 fn build_one_missing_path_produces_placeholder_with_warning() {
562 let tmp = TempDir::new().unwrap();
563 let ghost = tmp.path().join("ghost");
564
565 let rc = RepoContext::build_one("ghost", &ghost, &[AgentId::ClaudeCode]);
566 assert_eq!(rc.branch, None);
567 assert!(rc.agent_docs.is_empty());
568 assert_eq!(rc.warnings.len(), 1);
569 assert!(
570 rc.warnings[0].to_lowercase().contains("accessible")
571 || rc.warnings[0].to_lowercase().contains("no such")
572 );
573 }
574
575 #[test]
576 fn build_one_detached_head_yields_null_branch_no_warning() {
577 let tmp = TempDir::new().unwrap();
578 let repo = init_repo(tmp.path(), "r");
579 let git_repo = git2::Repository::open(&repo).unwrap();
580 let head_id = {
581 let head = git_repo.head().unwrap();
582 let commit = head.peel_to_commit().unwrap();
583 commit.id()
584 };
585 git_repo.set_head_detached(head_id).unwrap();
586 drop(git_repo);
587
588 let rc = RepoContext::build_one("r", &repo, &[]);
589 assert_eq!(rc.branch, None);
590 assert!(rc.warnings.is_empty(), "detached is not a warning state");
591 }
592
593 #[test]
594 fn build_one_preserves_agent_order_from_input() {
595 let tmp = TempDir::new().unwrap();
596 let repo = init_repo(tmp.path(), "r");
597 std::fs::write(repo.join("CLAUDE.md"), "c").unwrap();
598 std::fs::write(repo.join("AGENTS.md"), "a").unwrap();
599
600 let rc = RepoContext::build_one("r", &repo, &[AgentId::AgentsMd, AgentId::ClaudeCode]);
601 assert_eq!(rc.agent_docs[0].agent, AgentId::AgentsMd);
602 assert_eq!(rc.agent_docs[1].agent, AgentId::ClaudeCode);
603 }
604
605 #[test]
608 fn scope_serializes_with_tagged_kind() {
609 let s = serde_json::to_value(Scope::All).unwrap();
610 assert_eq!(s, serde_json::json!({ "kind": "all" }));
611
612 let s = serde_json::to_value(Scope::Workspace {
613 name: "team".into(),
614 })
615 .unwrap();
616 assert_eq!(
617 s,
618 serde_json::json!({ "kind": "workspace", "name": "team" })
619 );
620
621 let s = serde_json::to_value(Scope::Repos {
622 repos: vec!["a".into(), "b".into()],
623 })
624 .unwrap();
625 assert_eq!(
626 s,
627 serde_json::json!({ "kind": "repos", "repos": ["a", "b"] })
628 );
629 }
630
631 #[test]
632 fn matched_file_path_serializes_with_forward_slashes() {
633 let mf = MatchedFile {
634 path: PathBuf::from(".cursor").join("rules").join("a.md"),
635 bytes: 1,
636 content: "x".into(),
637 };
638 let v = serde_json::to_value(&mf).unwrap();
639 assert_eq!(v["path"], ".cursor/rules/a.md");
640 assert_eq!(v["bytes"], 1);
641 assert_eq!(v["content"], "x");
642 }
643
644 #[test]
645 fn full_context_envelope_serializes_with_documented_keys() {
646 let context = Context {
647 schema_version: SCHEMA_VERSION,
648 generated_at: "2026-05-24T00:00:00Z".into(),
649 agents: vec![AgentId::ClaudeCode],
650 scope: Scope::All,
651 repos: vec![RepoContext {
652 name: "r".into(),
653 path: PathBuf::from("/tmp/r"),
654 branch: Some("main".into()),
655 agent_docs: vec![AgentDoc {
656 agent: AgentId::ClaudeCode,
657 files: vec![MatchedFile {
658 path: PathBuf::from("CLAUDE.md"),
659 bytes: 5,
660 content: "hello".into(),
661 }],
662 }],
663 warnings: vec![],
664 }],
665 warnings: vec![],
666 };
667 let v = serde_json::to_value(&context).unwrap();
668 assert_eq!(v["schema_version"], 1);
669 assert_eq!(v["generated_at"], "2026-05-24T00:00:00Z");
670 assert_eq!(v["agents"][0], "claude-code");
671 assert_eq!(v["scope"]["kind"], "all");
672 assert_eq!(v["repos"][0]["name"], "r");
673 assert_eq!(v["repos"][0]["branch"], "main");
674 assert_eq!(v["repos"][0]["agent_docs"][0]["agent"], "claude-code");
675 assert_eq!(
676 v["repos"][0]["agent_docs"][0]["files"][0]["path"],
677 "CLAUDE.md"
678 );
679 assert!(v["warnings"].is_array() && v["warnings"].as_array().unwrap().is_empty());
680 }
681}