1use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use sha2::{Digest, Sha256};
22use std::collections::HashMap;
23use std::fs;
24use std::path::{Path, PathBuf};
25use std::time::SystemTime;
26
27use crate::error::Result;
28use crate::types::{Memory, MemoryScope, MemoryTier, MemoryType, Visibility};
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ProjectContextConfig {
37 pub enabled: bool,
39 pub max_file_size: u64,
41 pub extract_sections: bool,
43 pub scan_parents: bool,
45 pub ignore_dirs: Vec<String>,
47 pub ignore_files: Vec<String>,
49 pub default_visibility: Visibility,
51 pub search_boost: f32,
53}
54
55impl Default for ProjectContextConfig {
56 fn default() -> Self {
57 Self {
58 enabled: true,
59 max_file_size: 1024 * 1024, extract_sections: true,
61 scan_parents: false,
62 ignore_dirs: vec![
63 ".git".to_string(),
64 "target".to_string(),
65 "node_modules".to_string(),
66 "vendor".to_string(),
67 ".venv".to_string(),
68 "__pycache__".to_string(),
69 "dist".to_string(),
70 "build".to_string(),
71 ],
72 ignore_files: vec![
73 ".env*".to_string(),
74 "*.key".to_string(),
75 "*.pem".to_string(),
76 "*.p12".to_string(),
77 "secrets/*".to_string(),
78 ],
79 default_visibility: Visibility::Private,
80 search_boost: 0.2,
81 }
82 }
83}
84
85pub const CORE_INSTRUCTION_FILES: &[&str] = &[
91 "CLAUDE.md",
92 "AGENTS.md",
93 ".cursorrules",
94 ".github/copilot-instructions.md",
95 ".aider.conf.yml",
96 "GEMINI.md",
97 ".windsurfrules",
98 "CONVENTIONS.md",
99 "CODING_GUIDELINES.md",
100];
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum InstructionFileType {
110 ClaudeMd,
111 AgentsMd,
112 CursorRules,
113 CopilotInstructions,
114 GeminiMd,
115 AiderConf,
116 ConventionsMd,
117 WindsurfRules,
118 CodingGuidelines,
119 Custom,
120}
121
122impl InstructionFileType {
123 pub fn from_filename(filename: &str) -> Self {
125 match filename.to_lowercase().as_str() {
126 "claude.md" => Self::ClaudeMd,
127 "agents.md" => Self::AgentsMd,
128 ".cursorrules" => Self::CursorRules,
129 "copilot-instructions.md" => Self::CopilotInstructions,
130 "gemini.md" => Self::GeminiMd,
131 ".aider.conf.yml" => Self::AiderConf,
132 "conventions.md" => Self::ConventionsMd,
133 ".windsurfrules" => Self::WindsurfRules,
134 "coding_guidelines.md" | "coding-guidelines.md" => Self::CodingGuidelines,
135 _ => Self::Custom,
136 }
137 }
138
139 pub fn as_tag(&self) -> &'static str {
141 match self {
142 Self::ClaudeMd => "claude-md",
143 Self::AgentsMd => "agents-md",
144 Self::CursorRules => "cursorrules",
145 Self::CopilotInstructions => "copilot-instructions",
146 Self::GeminiMd => "gemini-md",
147 Self::AiderConf => "aider-conf",
148 Self::ConventionsMd => "conventions-md",
149 Self::WindsurfRules => "windsurfrules",
150 Self::CodingGuidelines => "coding-guidelines",
151 Self::Custom => "custom-instructions",
152 }
153 }
154}
155
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum FileFormat {
159 Markdown,
160 Yaml,
161 PlainText,
162}
163
164impl FileFormat {
165 pub fn from_filename(filename: &str) -> Self {
167 let lower = filename.to_lowercase();
168 if lower.ends_with(".md") {
169 Self::Markdown
170 } else if lower.ends_with(".yml") || lower.ends_with(".yaml") {
171 Self::Yaml
172 } else {
173 Self::PlainText
174 }
175 }
176
177 pub fn as_str(&self) -> &'static str {
179 match self {
180 Self::Markdown => "markdown",
181 Self::Yaml => "yaml",
182 Self::PlainText => "plaintext",
183 }
184 }
185}
186
187#[derive(Debug, Clone)]
189pub struct DiscoveredFile {
190 pub path: PathBuf,
192 pub filename: String,
194 pub size: u64,
196 pub content: String,
198 pub file_type: InstructionFileType,
200 pub format: FileFormat,
202 pub content_hash: String,
204 pub mtime: SystemTime,
206 pub project_path: PathBuf,
208}
209
210#[derive(Debug, Clone)]
212pub struct ParsedInstructions {
213 pub sections: Vec<ParsedSection>,
215 pub raw_content: String,
217 pub file_hash: String,
219}
220
221#[derive(Debug, Clone)]
223pub struct ParsedSection {
224 pub title: String,
226 pub content: String,
228 pub section_path: String,
230 pub section_index: usize,
232 pub heading_level: usize,
234 pub heading_anchor: String,
236 pub content_hash: String,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct ScanResult {
243 pub project_path: String,
245 pub files_found: usize,
247 pub memories_created: usize,
249 pub memories_updated: usize,
251 pub files_skipped: usize,
253 pub errors: Vec<String>,
255 pub scanned_at: DateTime<Utc>,
257}
258
259pub trait InstructionFileParser: Send + Sync {
265 fn parse(&self, content: &str) -> Result<ParsedInstructions>;
267}
268
269pub struct MarkdownParser;
271
272impl InstructionFileParser for MarkdownParser {
273 fn parse(&self, content: &str) -> Result<ParsedInstructions> {
274 let file_hash = hash_content(content);
275 let mut sections = Vec::new();
276 let mut current_section: Option<(String, String, usize, Vec<String>)> = None;
277 let mut section_index = 0;
278 let mut heading_stack: Vec<(usize, String)> = Vec::new();
279
280 for line in content.lines() {
281 if let Some((level, title)) = parse_markdown_heading(line) {
282 if let Some((title, content, level, path_parts)) = current_section.take() {
284 if !content.trim().is_empty() {
285 let section_path = path_parts.join(" > ");
286 sections.push(ParsedSection {
287 title: title.clone(),
288 content: content.trim().to_string(),
289 section_path,
290 section_index,
291 heading_level: level,
292 heading_anchor: slugify(&title),
293 content_hash: hash_content(&content),
294 });
295 section_index += 1;
296 }
297 }
298
299 while heading_stack
301 .last()
302 .map(|(l, _)| *l >= level)
303 .unwrap_or(false)
304 {
305 heading_stack.pop();
306 }
307 heading_stack.push((level, title.clone()));
308
309 let path_parts: Vec<String> =
311 heading_stack.iter().map(|(_, t)| t.clone()).collect();
312
313 current_section = Some((title, String::new(), level, path_parts));
314 } else if let Some((_, ref mut content, _, _)) = current_section {
315 content.push_str(line);
316 content.push('\n');
317 }
318 }
319
320 if let Some((title, content, level, path_parts)) = current_section {
322 if !content.trim().is_empty() {
323 let section_path = path_parts.join(" > ");
324 sections.push(ParsedSection {
325 title: title.clone(),
326 content: content.trim().to_string(),
327 section_path,
328 section_index,
329 heading_level: level,
330 heading_anchor: slugify(&title),
331 content_hash: hash_content(&content),
332 });
333 }
334 }
335
336 Ok(ParsedInstructions {
337 sections,
338 raw_content: content.to_string(),
339 file_hash,
340 })
341 }
342}
343
344pub struct YamlParser;
346
347impl InstructionFileParser for YamlParser {
348 fn parse(&self, content: &str) -> Result<ParsedInstructions> {
349 let file_hash = hash_content(content);
350
351 let sections = vec![ParsedSection {
354 title: "Configuration".to_string(),
355 content: content.to_string(),
356 section_path: "Configuration".to_string(),
357 section_index: 0,
358 heading_level: 1,
359 heading_anchor: "configuration".to_string(),
360 content_hash: file_hash.clone(),
361 }];
362
363 Ok(ParsedInstructions {
364 sections,
365 raw_content: content.to_string(),
366 file_hash,
367 })
368 }
369}
370
371pub struct PlainTextParser;
373
374impl InstructionFileParser for PlainTextParser {
375 fn parse(&self, content: &str) -> Result<ParsedInstructions> {
376 let file_hash = hash_content(content);
377
378 let sections = vec![ParsedSection {
379 title: "Instructions".to_string(),
380 content: content.to_string(),
381 section_path: "Instructions".to_string(),
382 section_index: 0,
383 heading_level: 1,
384 heading_anchor: "instructions".to_string(),
385 content_hash: file_hash.clone(),
386 }];
387
388 Ok(ParsedInstructions {
389 sections,
390 raw_content: content.to_string(),
391 file_hash,
392 })
393 }
394}
395
396pub struct ProjectContextEngine {
402 config: ProjectContextConfig,
403 markdown_parser: MarkdownParser,
404 yaml_parser: YamlParser,
405 plaintext_parser: PlainTextParser,
406}
407
408impl ProjectContextEngine {
409 pub fn new() -> Self {
411 Self::with_config(ProjectContextConfig::default())
412 }
413
414 pub fn with_config(config: ProjectContextConfig) -> Self {
416 Self {
417 config,
418 markdown_parser: MarkdownParser,
419 yaml_parser: YamlParser,
420 plaintext_parser: PlainTextParser,
421 }
422 }
423
424 fn get_parser(&self, format: FileFormat) -> &dyn InstructionFileParser {
426 match format {
427 FileFormat::Markdown => &self.markdown_parser,
428 FileFormat::Yaml => &self.yaml_parser,
429 FileFormat::PlainText => &self.plaintext_parser,
430 }
431 }
432
433 pub fn scan_directory(&self, path: &Path) -> Result<Vec<DiscoveredFile>> {
437 let (files, _skipped) = self.scan_directory_with_stats(path)?;
438 Ok(files)
439 }
440
441 pub fn scan_directory_with_stats(&self, path: &Path) -> Result<(Vec<DiscoveredFile>, usize)> {
444 if !self.config.enabled {
445 return Ok((Vec::new(), 0));
446 }
447
448 let mut discovered = Vec::new();
449 let mut skipped = 0;
450 let project_path = path.to_path_buf();
451
452 for pattern in CORE_INSTRUCTION_FILES {
454 let file_path = path.join(pattern);
455 if file_path.exists() && file_path.is_file() {
456 match self.read_file(&file_path, &project_path) {
457 Ok(Some(file)) => discovered.push(file),
458 Ok(None) => skipped += 1, Err(e) => {
460 tracing::warn!("Error reading {}: {}", file_path.display(), e);
461 }
462 }
463 }
464 }
465
466 if self.config.scan_parents {
468 if let Some(parent) = path.parent() {
469 if parent != path {
470 let (parent_files, parent_skipped) = self.scan_directory_with_stats(parent)?;
471 discovered.extend(parent_files);
472 skipped += parent_skipped;
473 }
474 }
475 }
476
477 Ok((discovered, skipped))
478 }
479
480 fn read_file(&self, path: &Path, project_path: &Path) -> Result<Option<DiscoveredFile>> {
482 let metadata = fs::metadata(path)?;
483 let size = metadata.len();
484
485 if size > self.config.max_file_size {
487 tracing::info!(
488 "Skipping {} (size {} > max {})",
489 path.display(),
490 size,
491 self.config.max_file_size
492 );
493 return Ok(None);
494 }
495
496 let content = fs::read_to_string(path)?;
497 let filename = path
498 .file_name()
499 .and_then(|n| n.to_str())
500 .unwrap_or("unknown")
501 .to_string();
502
503 let file_type = InstructionFileType::from_filename(&filename);
504 let format = FileFormat::from_filename(&filename);
505 let content_hash = hash_content(&content);
506 let mtime = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
507
508 Ok(Some(DiscoveredFile {
509 path: path.to_path_buf(),
510 filename,
511 size,
512 content,
513 file_type,
514 format,
515 content_hash,
516 mtime,
517 project_path: project_path.to_path_buf(),
518 }))
519 }
520
521 pub fn parse_file(&self, file: &DiscoveredFile) -> Result<ParsedInstructions> {
523 let parser = self.get_parser(file.format);
524 parser.parse(&file.content)
525 }
526
527 pub fn file_to_memory(&self, file: &DiscoveredFile) -> Memory {
529 let mut metadata = HashMap::new();
530 metadata.insert(
531 "source_file".to_string(),
532 serde_json::Value::String(file.path.to_string_lossy().to_string()),
533 );
534 metadata.insert(
535 "file_type".to_string(),
536 serde_json::Value::String(file.file_type.as_tag().to_string()),
537 );
538 metadata.insert(
539 "project_path".to_string(),
540 serde_json::Value::String(file.project_path.to_string_lossy().to_string()),
541 );
542 metadata.insert(
543 "file_hash".to_string(),
544 serde_json::Value::String(file.content_hash.clone()),
545 );
546 let mtime_rfc3339 = file
548 .mtime
549 .duration_since(std::time::UNIX_EPOCH)
550 .map(|d| DateTime::<Utc>::from(std::time::UNIX_EPOCH + d).to_rfc3339())
551 .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string());
552 metadata.insert(
553 "file_mtime".to_string(),
554 serde_json::Value::String(mtime_rfc3339),
555 );
556
557 Memory {
558 id: 0,
559 content: file.content.clone(),
560 memory_type: MemoryType::Context,
561 tags: vec![
562 "project-context".to_string(),
563 file.file_type.as_tag().to_string(),
564 ],
565 metadata,
566 importance: 0.8, access_count: 0,
568 created_at: Utc::now(),
569 updated_at: Utc::now(),
570 last_accessed_at: None,
571 owner_id: None,
572 visibility: self.config.default_visibility,
573 scope: MemoryScope::Global,
574 workspace: "default".to_string(),
575 tier: MemoryTier::Permanent,
576 version: 1,
577 has_embedding: false,
578 expires_at: None,
579 content_hash: None, event_time: None,
581 event_duration_seconds: None,
582 trigger_pattern: None,
583 procedure_success_count: 0,
584 procedure_failure_count: 0,
585 summary_of_id: None,
586 lifecycle_state: crate::types::LifecycleState::Active,
587 }
588 }
589
590 pub fn section_to_memory(
592 &self,
593 section: &ParsedSection,
594 file: &DiscoveredFile,
595 parent_id: i64,
596 ) -> Memory {
597 let mut metadata = HashMap::new();
598 metadata.insert(
599 "source_file".to_string(),
600 serde_json::Value::String(file.path.to_string_lossy().to_string()),
601 );
602 metadata.insert(
603 "file_type".to_string(),
604 serde_json::Value::String(file.file_type.as_tag().to_string()),
605 );
606 metadata.insert(
607 "project_path".to_string(),
608 serde_json::Value::String(file.project_path.to_string_lossy().to_string()),
609 );
610 metadata.insert(
611 "section_path".to_string(),
612 serde_json::Value::String(section.section_path.clone()),
613 );
614 metadata.insert(
615 "section_index".to_string(),
616 serde_json::json!(section.section_index),
617 );
618 metadata.insert(
619 "content_hash".to_string(),
620 serde_json::Value::String(section.content_hash.clone()),
621 );
622 metadata.insert(
623 "heading_anchor".to_string(),
624 serde_json::Value::String(section.heading_anchor.clone()),
625 );
626 metadata.insert("parent_memory_id".to_string(), serde_json::json!(parent_id));
627
628 let content = format!("# {}\n\n{}", section.title, section.content);
630
631 Memory {
632 id: 0,
633 content,
634 memory_type: MemoryType::Context,
635 tags: vec![
636 "project-context".to_string(),
637 "section".to_string(),
638 file.file_type.as_tag().to_string(),
639 ],
640 metadata,
641 importance: 0.7,
642 access_count: 0,
643 created_at: Utc::now(),
644 updated_at: Utc::now(),
645 last_accessed_at: None,
646 owner_id: None,
647 visibility: self.config.default_visibility,
648 scope: MemoryScope::Global,
649 workspace: "default".to_string(),
650 tier: MemoryTier::Permanent,
651 version: 1,
652 has_embedding: false,
653 expires_at: None,
654 content_hash: None, event_time: None,
656 event_duration_seconds: None,
657 trigger_pattern: None,
658 procedure_success_count: 0,
659 procedure_failure_count: 0,
660 summary_of_id: None,
661 lifecycle_state: crate::types::LifecycleState::Active,
662 }
663 }
664
665 pub fn config(&self) -> &ProjectContextConfig {
667 &self.config
668 }
669}
670
671impl Default for ProjectContextEngine {
672 fn default() -> Self {
673 Self::new()
674 }
675}
676
677fn hash_content(content: &str) -> String {
683 let mut hasher = Sha256::new();
684 hasher.update(content.as_bytes());
685 format!("sha256:{}", hex::encode(hasher.finalize()))
686}
687
688fn parse_markdown_heading(line: &str) -> Option<(usize, String)> {
690 let trimmed = line.trim_start();
691 if !trimmed.starts_with('#') {
692 return None;
693 }
694
695 let level = trimmed.chars().take_while(|&c| c == '#').count();
696 if level == 0 || level > 6 {
697 return None;
698 }
699
700 let title = trimmed[level..].trim().to_string();
701 if title.is_empty() {
702 return None;
703 }
704
705 Some((level, title))
706}
707
708fn slugify(title: &str) -> String {
710 title
711 .to_lowercase()
712 .chars()
713 .map(|c| if c.is_alphanumeric() { c } else { '-' })
714 .collect::<String>()
715 .split('-')
716 .filter(|s| !s.is_empty())
717 .collect::<Vec<_>>()
718 .join("-")
719}
720
721#[cfg(test)]
726mod tests {
727 use super::*;
728
729 #[test]
730 fn test_instruction_file_type_detection() {
731 assert_eq!(
732 InstructionFileType::from_filename("CLAUDE.md"),
733 InstructionFileType::ClaudeMd
734 );
735 assert_eq!(
736 InstructionFileType::from_filename(".cursorrules"),
737 InstructionFileType::CursorRules
738 );
739 assert_eq!(
740 InstructionFileType::from_filename(".aider.conf.yml"),
741 InstructionFileType::AiderConf
742 );
743 assert_eq!(
744 InstructionFileType::from_filename("random.txt"),
745 InstructionFileType::Custom
746 );
747 }
748
749 #[test]
750 fn test_file_format_detection() {
751 assert_eq!(FileFormat::from_filename("CLAUDE.md"), FileFormat::Markdown);
752 assert_eq!(
753 FileFormat::from_filename(".aider.conf.yml"),
754 FileFormat::Yaml
755 );
756 assert_eq!(
757 FileFormat::from_filename(".cursorrules"),
758 FileFormat::PlainText
759 );
760 }
761
762 #[test]
763 fn test_markdown_heading_parsing() {
764 assert_eq!(
765 parse_markdown_heading("# Title"),
766 Some((1, "Title".to_string()))
767 );
768 assert_eq!(
769 parse_markdown_heading("## Subtitle"),
770 Some((2, "Subtitle".to_string()))
771 );
772 assert_eq!(
773 parse_markdown_heading("### Deep Heading"),
774 Some((3, "Deep Heading".to_string()))
775 );
776 assert_eq!(parse_markdown_heading("Not a heading"), None);
777 assert_eq!(parse_markdown_heading("#"), None); }
779
780 #[test]
781 fn test_slugify() {
782 assert_eq!(slugify("Hello World"), "hello-world");
783 assert_eq!(slugify("Unit Testing"), "unit-testing");
784 assert_eq!(slugify("API & REST"), "api-rest");
785 assert_eq!(slugify(" Multiple Spaces "), "multiple-spaces");
786 }
787
788 #[test]
789 fn test_hash_content() {
790 let hash1 = hash_content("hello");
791 let hash2 = hash_content("hello");
792 let hash3 = hash_content("world");
793
794 assert_eq!(hash1, hash2);
795 assert_ne!(hash1, hash3);
796 assert!(hash1.starts_with("sha256:"));
797 }
798
799 #[test]
800 fn test_markdown_parser() {
801 let content = r#"# Main Title
802
803Some intro text.
804
805## Section One
806
807Content of section one.
808
809## Section Two
810
811Content of section two.
812
813### Subsection
814
815Nested content.
816"#;
817
818 let parser = MarkdownParser;
819 let result = parser.parse(content).unwrap();
820
821 assert_eq!(result.sections.len(), 4);
822 assert_eq!(result.sections[0].title, "Main Title");
823 assert_eq!(result.sections[0].section_path, "Main Title");
824 assert_eq!(result.sections[1].title, "Section One");
825 assert_eq!(result.sections[1].section_path, "Main Title > Section One");
826 assert_eq!(result.sections[2].title, "Section Two");
827 assert_eq!(result.sections[3].title, "Subsection");
828 assert_eq!(
829 result.sections[3].section_path,
830 "Main Title > Section Two > Subsection"
831 );
832 }
833
834 #[test]
835 fn test_yaml_parser() {
836 let content = "key: value\nother: data";
837 let parser = YamlParser;
838 let result = parser.parse(content).unwrap();
839
840 assert_eq!(result.sections.len(), 1);
841 assert_eq!(result.sections[0].title, "Configuration");
842 }
843
844 #[test]
845 fn test_plaintext_parser() {
846 let content = "Some plain text instructions";
847 let parser = PlainTextParser;
848 let result = parser.parse(content).unwrap();
849
850 assert_eq!(result.sections.len(), 1);
851 assert_eq!(result.sections[0].title, "Instructions");
852 }
853
854 #[test]
855 fn test_engine_default_config() {
856 let engine = ProjectContextEngine::new();
857 assert!(engine.config().enabled);
858 assert_eq!(engine.config().max_file_size, 1024 * 1024);
859 assert!(!engine.config().scan_parents);
860 }
861
862 #[test]
863 fn test_file_to_memory() {
864 let engine = ProjectContextEngine::new();
865 let file = DiscoveredFile {
866 path: PathBuf::from("/project/CLAUDE.md"),
867 filename: "CLAUDE.md".to_string(),
868 size: 100,
869 content: "# Test\n\nContent".to_string(),
870 file_type: InstructionFileType::ClaudeMd,
871 format: FileFormat::Markdown,
872 content_hash: "sha256:abc123".to_string(),
873 mtime: SystemTime::UNIX_EPOCH,
874 project_path: PathBuf::from("/project"),
875 };
876
877 let memory = engine.file_to_memory(&file);
878
879 assert_eq!(memory.memory_type, MemoryType::Context);
880 assert!(memory.tags.contains(&"project-context".to_string()));
881 assert!(memory.tags.contains(&"claude-md".to_string()));
882 assert_eq!(memory.importance, 0.8);
883 }
884
885 #[test]
886 fn test_section_to_memory() {
887 let engine = ProjectContextEngine::new();
888 let file = DiscoveredFile {
889 path: PathBuf::from("/project/CLAUDE.md"),
890 filename: "CLAUDE.md".to_string(),
891 size: 100,
892 content: "# Test".to_string(),
893 file_type: InstructionFileType::ClaudeMd,
894 format: FileFormat::Markdown,
895 content_hash: "sha256:abc".to_string(),
896 mtime: SystemTime::UNIX_EPOCH,
897 project_path: PathBuf::from("/project"),
898 };
899
900 let section = ParsedSection {
901 title: "Guidelines".to_string(),
902 content: "Follow these rules".to_string(),
903 section_path: "Main > Guidelines".to_string(),
904 section_index: 1,
905 heading_level: 2,
906 heading_anchor: "guidelines".to_string(),
907 content_hash: "sha256:def".to_string(),
908 };
909
910 let memory = engine.section_to_memory(§ion, &file, 123);
911
912 assert!(memory.content.contains("# Guidelines"));
913 assert!(memory.tags.contains(&"section".to_string()));
914 assert_eq!(
915 memory.metadata.get("parent_memory_id"),
916 Some(&serde_json::Value::Number(123.into()))
917 );
918 }
919}