1use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use sha2::{Digest, Sha256};
22use std::collections::HashMap;
23use std::fs;
24use std::path::{Path, PathBuf};
25use std::time::SystemTime;
26
27use crate::error::Result;
28use crate::types::{Memory, MemoryScope, MemoryTier, MemoryType, Visibility};
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ProjectContextConfig {
37 pub enabled: bool,
39 pub max_file_size: u64,
41 pub extract_sections: bool,
43 pub scan_parents: bool,
45 pub ignore_dirs: Vec<String>,
47 pub ignore_files: Vec<String>,
49 pub default_visibility: Visibility,
51 pub search_boost: f32,
53}
54
55impl Default for ProjectContextConfig {
56 fn default() -> Self {
57 Self {
58 enabled: true,
59 max_file_size: 1024 * 1024, extract_sections: true,
61 scan_parents: false,
62 ignore_dirs: vec![
63 ".git".to_string(),
64 "target".to_string(),
65 "node_modules".to_string(),
66 "vendor".to_string(),
67 ".venv".to_string(),
68 "__pycache__".to_string(),
69 "dist".to_string(),
70 "build".to_string(),
71 ],
72 ignore_files: vec![
73 ".env*".to_string(),
74 "*.key".to_string(),
75 "*.pem".to_string(),
76 "*.p12".to_string(),
77 "secrets/*".to_string(),
78 ],
79 default_visibility: Visibility::Private,
80 search_boost: 0.2,
81 }
82 }
83}
84
85pub const CORE_INSTRUCTION_FILES: &[&str] = &[
91 "CLAUDE.md",
92 "AGENTS.md",
93 ".cursorrules",
94 ".github/copilot-instructions.md",
95 ".aider.conf.yml",
96 "GEMINI.md",
97 ".windsurfrules",
98 "CONVENTIONS.md",
99 "CODING_GUIDELINES.md",
100];
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum InstructionFileType {
110 ClaudeMd,
111 AgentsMd,
112 CursorRules,
113 CopilotInstructions,
114 GeminiMd,
115 AiderConf,
116 ConventionsMd,
117 WindsurfRules,
118 CodingGuidelines,
119 Custom,
120}
121
122impl InstructionFileType {
123 pub fn from_filename(filename: &str) -> Self {
125 match filename.to_lowercase().as_str() {
126 "claude.md" => Self::ClaudeMd,
127 "agents.md" => Self::AgentsMd,
128 ".cursorrules" => Self::CursorRules,
129 "copilot-instructions.md" => Self::CopilotInstructions,
130 "gemini.md" => Self::GeminiMd,
131 ".aider.conf.yml" => Self::AiderConf,
132 "conventions.md" => Self::ConventionsMd,
133 ".windsurfrules" => Self::WindsurfRules,
134 "coding_guidelines.md" | "coding-guidelines.md" => Self::CodingGuidelines,
135 _ => Self::Custom,
136 }
137 }
138
139 pub fn as_tag(&self) -> &'static str {
141 match self {
142 Self::ClaudeMd => "claude-md",
143 Self::AgentsMd => "agents-md",
144 Self::CursorRules => "cursorrules",
145 Self::CopilotInstructions => "copilot-instructions",
146 Self::GeminiMd => "gemini-md",
147 Self::AiderConf => "aider-conf",
148 Self::ConventionsMd => "conventions-md",
149 Self::WindsurfRules => "windsurfrules",
150 Self::CodingGuidelines => "coding-guidelines",
151 Self::Custom => "custom-instructions",
152 }
153 }
154}
155
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum FileFormat {
159 Markdown,
160 Yaml,
161 PlainText,
162}
163
164impl FileFormat {
165 pub fn from_filename(filename: &str) -> Self {
167 let lower = filename.to_lowercase();
168 if lower.ends_with(".md") {
169 Self::Markdown
170 } else if lower.ends_with(".yml") || lower.ends_with(".yaml") {
171 Self::Yaml
172 } else {
173 Self::PlainText
174 }
175 }
176
177 pub fn as_str(&self) -> &'static str {
179 match self {
180 Self::Markdown => "markdown",
181 Self::Yaml => "yaml",
182 Self::PlainText => "plaintext",
183 }
184 }
185}
186
187#[derive(Debug, Clone)]
189pub struct DiscoveredFile {
190 pub path: PathBuf,
192 pub filename: String,
194 pub size: u64,
196 pub content: String,
198 pub file_type: InstructionFileType,
200 pub format: FileFormat,
202 pub content_hash: String,
204 pub mtime: SystemTime,
206 pub project_path: PathBuf,
208}
209
210#[derive(Debug, Clone)]
212pub struct ParsedInstructions {
213 pub sections: Vec<ParsedSection>,
215 pub raw_content: String,
217 pub file_hash: String,
219}
220
221#[derive(Debug, Clone)]
223pub struct ParsedSection {
224 pub title: String,
226 pub content: String,
228 pub section_path: String,
230 pub section_index: usize,
232 pub heading_level: usize,
234 pub heading_anchor: String,
236 pub content_hash: String,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct ScanResult {
243 pub project_path: String,
245 pub files_found: usize,
247 pub memories_created: usize,
249 pub memories_updated: usize,
251 pub files_skipped: usize,
253 pub errors: Vec<String>,
255 pub scanned_at: DateTime<Utc>,
257}
258
259pub trait InstructionFileParser: Send + Sync {
265 fn parse(&self, content: &str) -> Result<ParsedInstructions>;
267}
268
269pub struct MarkdownParser;
271
272impl InstructionFileParser for MarkdownParser {
273 fn parse(&self, content: &str) -> Result<ParsedInstructions> {
274 let file_hash = hash_content(content);
275 let mut sections = Vec::new();
276 let mut current_section: Option<(String, String, usize, Vec<String>)> = None;
277 let mut section_index = 0;
278 let mut heading_stack: Vec<(usize, String)> = Vec::new();
279
280 for line in content.lines() {
281 if let Some((level, title)) = parse_markdown_heading(line) {
282 if let Some((title, content, level, path_parts)) = current_section.take() {
284 if !content.trim().is_empty() {
285 let section_path = path_parts.join(" > ");
286 sections.push(ParsedSection {
287 title: title.clone(),
288 content: content.trim().to_string(),
289 section_path,
290 section_index,
291 heading_level: level,
292 heading_anchor: slugify(&title),
293 content_hash: hash_content(&content),
294 });
295 section_index += 1;
296 }
297 }
298
299 while heading_stack
301 .last()
302 .map(|(l, _)| *l >= level)
303 .unwrap_or(false)
304 {
305 heading_stack.pop();
306 }
307 heading_stack.push((level, title.clone()));
308
309 let path_parts: Vec<String> =
311 heading_stack.iter().map(|(_, t)| t.clone()).collect();
312
313 current_section = Some((title, String::new(), level, path_parts));
314 } else if let Some((_, ref mut content, _, _)) = current_section {
315 content.push_str(line);
316 content.push('\n');
317 }
318 }
319
320 if let Some((title, content, level, path_parts)) = current_section {
322 if !content.trim().is_empty() {
323 let section_path = path_parts.join(" > ");
324 sections.push(ParsedSection {
325 title: title.clone(),
326 content: content.trim().to_string(),
327 section_path,
328 section_index,
329 heading_level: level,
330 heading_anchor: slugify(&title),
331 content_hash: hash_content(&content),
332 });
333 }
334 }
335
336 Ok(ParsedInstructions {
337 sections,
338 raw_content: content.to_string(),
339 file_hash,
340 })
341 }
342}
343
344pub struct YamlParser;
346
347impl InstructionFileParser for YamlParser {
348 fn parse(&self, content: &str) -> Result<ParsedInstructions> {
349 let file_hash = hash_content(content);
350
351 let sections = vec![ParsedSection {
354 title: "Configuration".to_string(),
355 content: content.to_string(),
356 section_path: "Configuration".to_string(),
357 section_index: 0,
358 heading_level: 1,
359 heading_anchor: "configuration".to_string(),
360 content_hash: file_hash.clone(),
361 }];
362
363 Ok(ParsedInstructions {
364 sections,
365 raw_content: content.to_string(),
366 file_hash,
367 })
368 }
369}
370
371pub struct PlainTextParser;
373
374impl InstructionFileParser for PlainTextParser {
375 fn parse(&self, content: &str) -> Result<ParsedInstructions> {
376 let file_hash = hash_content(content);
377
378 let sections = vec![ParsedSection {
379 title: "Instructions".to_string(),
380 content: content.to_string(),
381 section_path: "Instructions".to_string(),
382 section_index: 0,
383 heading_level: 1,
384 heading_anchor: "instructions".to_string(),
385 content_hash: file_hash.clone(),
386 }];
387
388 Ok(ParsedInstructions {
389 sections,
390 raw_content: content.to_string(),
391 file_hash,
392 })
393 }
394}
395
396pub struct ProjectContextEngine {
402 config: ProjectContextConfig,
403 markdown_parser: MarkdownParser,
404 yaml_parser: YamlParser,
405 plaintext_parser: PlainTextParser,
406}
407
408impl ProjectContextEngine {
409 pub fn new() -> Self {
411 Self::with_config(ProjectContextConfig::default())
412 }
413
414 pub fn with_config(config: ProjectContextConfig) -> Self {
416 Self {
417 config,
418 markdown_parser: MarkdownParser,
419 yaml_parser: YamlParser,
420 plaintext_parser: PlainTextParser,
421 }
422 }
423
424 fn get_parser(&self, format: FileFormat) -> &dyn InstructionFileParser {
426 match format {
427 FileFormat::Markdown => &self.markdown_parser,
428 FileFormat::Yaml => &self.yaml_parser,
429 FileFormat::PlainText => &self.plaintext_parser,
430 }
431 }
432
433 pub fn scan_directory(&self, path: &Path) -> Result<Vec<DiscoveredFile>> {
437 let (files, _skipped) = self.scan_directory_with_stats(path)?;
438 Ok(files)
439 }
440
441 pub fn scan_directory_with_stats(&self, path: &Path) -> Result<(Vec<DiscoveredFile>, usize)> {
444 if !self.config.enabled {
445 return Ok((Vec::new(), 0));
446 }
447
448 let mut discovered = Vec::new();
449 let mut skipped = 0;
450 let project_path = path.to_path_buf();
451
452 for pattern in CORE_INSTRUCTION_FILES {
454 let file_path = path.join(pattern);
455 if file_path.exists() && file_path.is_file() {
456 match self.read_file(&file_path, &project_path) {
457 Ok(Some(file)) => discovered.push(file),
458 Ok(None) => skipped += 1, Err(e) => {
460 tracing::warn!("Error reading {}: {}", file_path.display(), e);
461 }
462 }
463 }
464 }
465
466 if self.config.scan_parents {
468 if let Some(parent) = path.parent() {
469 if parent != path {
470 let (parent_files, parent_skipped) = self.scan_directory_with_stats(parent)?;
471 discovered.extend(parent_files);
472 skipped += parent_skipped;
473 }
474 }
475 }
476
477 Ok((discovered, skipped))
478 }
479
480 fn read_file(&self, path: &Path, project_path: &Path) -> Result<Option<DiscoveredFile>> {
482 let metadata = fs::metadata(path)?;
483 let size = metadata.len();
484
485 if size > self.config.max_file_size {
487 tracing::info!(
488 "Skipping {} (size {} > max {})",
489 path.display(),
490 size,
491 self.config.max_file_size
492 );
493 return Ok(None);
494 }
495
496 let content = fs::read_to_string(path)?;
497 let filename = path
498 .file_name()
499 .and_then(|n| n.to_str())
500 .unwrap_or("unknown")
501 .to_string();
502
503 let file_type = InstructionFileType::from_filename(&filename);
504 let format = FileFormat::from_filename(&filename);
505 let content_hash = hash_content(&content);
506 let mtime = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
507
508 Ok(Some(DiscoveredFile {
509 path: path.to_path_buf(),
510 filename,
511 size,
512 content,
513 file_type,
514 format,
515 content_hash,
516 mtime,
517 project_path: project_path.to_path_buf(),
518 }))
519 }
520
521 pub fn parse_file(&self, file: &DiscoveredFile) -> Result<ParsedInstructions> {
523 let parser = self.get_parser(file.format);
524 parser.parse(&file.content)
525 }
526
527 pub fn file_to_memory(&self, file: &DiscoveredFile) -> Memory {
529 let mut metadata = HashMap::new();
530 metadata.insert(
531 "source_file".to_string(),
532 serde_json::Value::String(file.path.to_string_lossy().to_string()),
533 );
534 metadata.insert(
535 "file_type".to_string(),
536 serde_json::Value::String(file.file_type.as_tag().to_string()),
537 );
538 metadata.insert(
539 "project_path".to_string(),
540 serde_json::Value::String(file.project_path.to_string_lossy().to_string()),
541 );
542 metadata.insert(
543 "file_hash".to_string(),
544 serde_json::Value::String(file.content_hash.clone()),
545 );
546 let mtime_rfc3339 = file
548 .mtime
549 .duration_since(std::time::UNIX_EPOCH)
550 .map(|d| DateTime::<Utc>::from(std::time::UNIX_EPOCH + d).to_rfc3339())
551 .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string());
552 metadata.insert(
553 "file_mtime".to_string(),
554 serde_json::Value::String(mtime_rfc3339),
555 );
556
557 Memory {
558 id: 0,
559 content: file.content.clone(),
560 memory_type: MemoryType::Context,
561 tags: vec![
562 "project-context".to_string(),
563 file.file_type.as_tag().to_string(),
564 ],
565 metadata,
566 importance: 0.8, access_count: 0,
568 created_at: Utc::now(),
569 updated_at: Utc::now(),
570 last_accessed_at: None,
571 owner_id: None,
572 visibility: self.config.default_visibility,
573 scope: MemoryScope::Global,
574 workspace: "default".to_string(),
575 tier: MemoryTier::Permanent,
576 version: 1,
577 has_embedding: false,
578 expires_at: None,
579 content_hash: None, event_time: None,
581 event_duration_seconds: None,
582 trigger_pattern: None,
583 procedure_success_count: 0,
584 procedure_failure_count: 0,
585 summary_of_id: None,
586 lifecycle_state: crate::types::LifecycleState::Active,
587 media_url: None,
588 }
589 }
590
591 pub fn section_to_memory(
593 &self,
594 section: &ParsedSection,
595 file: &DiscoveredFile,
596 parent_id: i64,
597 ) -> Memory {
598 let mut metadata = HashMap::new();
599 metadata.insert(
600 "source_file".to_string(),
601 serde_json::Value::String(file.path.to_string_lossy().to_string()),
602 );
603 metadata.insert(
604 "file_type".to_string(),
605 serde_json::Value::String(file.file_type.as_tag().to_string()),
606 );
607 metadata.insert(
608 "project_path".to_string(),
609 serde_json::Value::String(file.project_path.to_string_lossy().to_string()),
610 );
611 metadata.insert(
612 "section_path".to_string(),
613 serde_json::Value::String(section.section_path.clone()),
614 );
615 metadata.insert(
616 "section_index".to_string(),
617 serde_json::json!(section.section_index),
618 );
619 metadata.insert(
620 "content_hash".to_string(),
621 serde_json::Value::String(section.content_hash.clone()),
622 );
623 metadata.insert(
624 "heading_anchor".to_string(),
625 serde_json::Value::String(section.heading_anchor.clone()),
626 );
627 metadata.insert("parent_memory_id".to_string(), serde_json::json!(parent_id));
628
629 let content = format!("# {}\n\n{}", section.title, section.content);
631
632 Memory {
633 id: 0,
634 content,
635 memory_type: MemoryType::Context,
636 tags: vec![
637 "project-context".to_string(),
638 "section".to_string(),
639 file.file_type.as_tag().to_string(),
640 ],
641 metadata,
642 importance: 0.7,
643 access_count: 0,
644 created_at: Utc::now(),
645 updated_at: Utc::now(),
646 last_accessed_at: None,
647 owner_id: None,
648 visibility: self.config.default_visibility,
649 scope: MemoryScope::Global,
650 workspace: "default".to_string(),
651 tier: MemoryTier::Permanent,
652 version: 1,
653 has_embedding: false,
654 expires_at: None,
655 content_hash: None, event_time: None,
657 event_duration_seconds: None,
658 trigger_pattern: None,
659 procedure_success_count: 0,
660 procedure_failure_count: 0,
661 summary_of_id: None,
662 lifecycle_state: crate::types::LifecycleState::Active,
663 media_url: None,
664 }
665 }
666
667 pub fn config(&self) -> &ProjectContextConfig {
669 &self.config
670 }
671}
672
673impl Default for ProjectContextEngine {
674 fn default() -> Self {
675 Self::new()
676 }
677}
678
679fn hash_content(content: &str) -> String {
685 let mut hasher = Sha256::new();
686 hasher.update(content.as_bytes());
687 format!("sha256:{}", hex::encode(hasher.finalize()))
688}
689
690fn parse_markdown_heading(line: &str) -> Option<(usize, String)> {
692 let trimmed = line.trim_start();
693 if !trimmed.starts_with('#') {
694 return None;
695 }
696
697 let level = trimmed.chars().take_while(|&c| c == '#').count();
698 if level == 0 || level > 6 {
699 return None;
700 }
701
702 let title = trimmed[level..].trim().to_string();
703 if title.is_empty() {
704 return None;
705 }
706
707 Some((level, title))
708}
709
710fn slugify(title: &str) -> String {
712 title
713 .to_lowercase()
714 .chars()
715 .map(|c| if c.is_alphanumeric() { c } else { '-' })
716 .collect::<String>()
717 .split('-')
718 .filter(|s| !s.is_empty())
719 .collect::<Vec<_>>()
720 .join("-")
721}
722
723#[cfg(test)]
728mod tests {
729 use super::*;
730
731 #[test]
732 fn test_instruction_file_type_detection() {
733 assert_eq!(
734 InstructionFileType::from_filename("CLAUDE.md"),
735 InstructionFileType::ClaudeMd
736 );
737 assert_eq!(
738 InstructionFileType::from_filename(".cursorrules"),
739 InstructionFileType::CursorRules
740 );
741 assert_eq!(
742 InstructionFileType::from_filename(".aider.conf.yml"),
743 InstructionFileType::AiderConf
744 );
745 assert_eq!(
746 InstructionFileType::from_filename("random.txt"),
747 InstructionFileType::Custom
748 );
749 }
750
751 #[test]
752 fn test_file_format_detection() {
753 assert_eq!(FileFormat::from_filename("CLAUDE.md"), FileFormat::Markdown);
754 assert_eq!(
755 FileFormat::from_filename(".aider.conf.yml"),
756 FileFormat::Yaml
757 );
758 assert_eq!(
759 FileFormat::from_filename(".cursorrules"),
760 FileFormat::PlainText
761 );
762 }
763
764 #[test]
765 fn test_markdown_heading_parsing() {
766 assert_eq!(
767 parse_markdown_heading("# Title"),
768 Some((1, "Title".to_string()))
769 );
770 assert_eq!(
771 parse_markdown_heading("## Subtitle"),
772 Some((2, "Subtitle".to_string()))
773 );
774 assert_eq!(
775 parse_markdown_heading("### Deep Heading"),
776 Some((3, "Deep Heading".to_string()))
777 );
778 assert_eq!(parse_markdown_heading("Not a heading"), None);
779 assert_eq!(parse_markdown_heading("#"), None); }
781
782 #[test]
783 fn test_slugify() {
784 assert_eq!(slugify("Hello World"), "hello-world");
785 assert_eq!(slugify("Unit Testing"), "unit-testing");
786 assert_eq!(slugify("API & REST"), "api-rest");
787 assert_eq!(slugify(" Multiple Spaces "), "multiple-spaces");
788 }
789
790 #[test]
791 fn test_hash_content() {
792 let hash1 = hash_content("hello");
793 let hash2 = hash_content("hello");
794 let hash3 = hash_content("world");
795
796 assert_eq!(hash1, hash2);
797 assert_ne!(hash1, hash3);
798 assert!(hash1.starts_with("sha256:"));
799 }
800
801 #[test]
802 fn test_markdown_parser() {
803 let content = r#"# Main Title
804
805Some intro text.
806
807## Section One
808
809Content of section one.
810
811## Section Two
812
813Content of section two.
814
815### Subsection
816
817Nested content.
818"#;
819
820 let parser = MarkdownParser;
821 let result = parser.parse(content).unwrap();
822
823 assert_eq!(result.sections.len(), 4);
824 assert_eq!(result.sections[0].title, "Main Title");
825 assert_eq!(result.sections[0].section_path, "Main Title");
826 assert_eq!(result.sections[1].title, "Section One");
827 assert_eq!(result.sections[1].section_path, "Main Title > Section One");
828 assert_eq!(result.sections[2].title, "Section Two");
829 assert_eq!(result.sections[3].title, "Subsection");
830 assert_eq!(
831 result.sections[3].section_path,
832 "Main Title > Section Two > Subsection"
833 );
834 }
835
836 #[test]
837 fn test_yaml_parser() {
838 let content = "key: value\nother: data";
839 let parser = YamlParser;
840 let result = parser.parse(content).unwrap();
841
842 assert_eq!(result.sections.len(), 1);
843 assert_eq!(result.sections[0].title, "Configuration");
844 }
845
846 #[test]
847 fn test_plaintext_parser() {
848 let content = "Some plain text instructions";
849 let parser = PlainTextParser;
850 let result = parser.parse(content).unwrap();
851
852 assert_eq!(result.sections.len(), 1);
853 assert_eq!(result.sections[0].title, "Instructions");
854 }
855
856 #[test]
857 fn test_engine_default_config() {
858 let engine = ProjectContextEngine::new();
859 assert!(engine.config().enabled);
860 assert_eq!(engine.config().max_file_size, 1024 * 1024);
861 assert!(!engine.config().scan_parents);
862 }
863
864 #[test]
865 fn test_file_to_memory() {
866 let engine = ProjectContextEngine::new();
867 let file = DiscoveredFile {
868 path: PathBuf::from("/project/CLAUDE.md"),
869 filename: "CLAUDE.md".to_string(),
870 size: 100,
871 content: "# Test\n\nContent".to_string(),
872 file_type: InstructionFileType::ClaudeMd,
873 format: FileFormat::Markdown,
874 content_hash: "sha256:abc123".to_string(),
875 mtime: SystemTime::UNIX_EPOCH,
876 project_path: PathBuf::from("/project"),
877 };
878
879 let memory = engine.file_to_memory(&file);
880
881 assert_eq!(memory.memory_type, MemoryType::Context);
882 assert!(memory.tags.contains(&"project-context".to_string()));
883 assert!(memory.tags.contains(&"claude-md".to_string()));
884 assert_eq!(memory.importance, 0.8);
885 }
886
887 #[test]
888 fn test_section_to_memory() {
889 let engine = ProjectContextEngine::new();
890 let file = DiscoveredFile {
891 path: PathBuf::from("/project/CLAUDE.md"),
892 filename: "CLAUDE.md".to_string(),
893 size: 100,
894 content: "# Test".to_string(),
895 file_type: InstructionFileType::ClaudeMd,
896 format: FileFormat::Markdown,
897 content_hash: "sha256:abc".to_string(),
898 mtime: SystemTime::UNIX_EPOCH,
899 project_path: PathBuf::from("/project"),
900 };
901
902 let section = ParsedSection {
903 title: "Guidelines".to_string(),
904 content: "Follow these rules".to_string(),
905 section_path: "Main > Guidelines".to_string(),
906 section_index: 1,
907 heading_level: 2,
908 heading_anchor: "guidelines".to_string(),
909 content_hash: "sha256:def".to_string(),
910 };
911
912 let memory = engine.section_to_memory(§ion, &file, 123);
913
914 assert!(memory.content.contains("# Guidelines"));
915 assert!(memory.tags.contains(&"section".to_string()));
916 assert_eq!(
917 memory.metadata.get("parent_memory_id"),
918 Some(&serde_json::Value::Number(123.into()))
919 );
920 }
921}