use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use crate::error::Result;
use crate::types::{Memory, MemoryScope, MemoryTier, MemoryType, Visibility};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectContextConfig {
pub enabled: bool,
pub max_file_size: u64,
pub extract_sections: bool,
pub scan_parents: bool,
pub ignore_dirs: Vec<String>,
pub ignore_files: Vec<String>,
pub default_visibility: Visibility,
pub search_boost: f32,
}
impl Default for ProjectContextConfig {
fn default() -> Self {
Self {
enabled: true,
max_file_size: 1024 * 1024, extract_sections: true,
scan_parents: false,
ignore_dirs: vec![
".git".to_string(),
"target".to_string(),
"node_modules".to_string(),
"vendor".to_string(),
".venv".to_string(),
"__pycache__".to_string(),
"dist".to_string(),
"build".to_string(),
],
ignore_files: vec![
".env*".to_string(),
"*.key".to_string(),
"*.pem".to_string(),
"*.p12".to_string(),
"secrets/*".to_string(),
],
default_visibility: Visibility::Private,
search_boost: 0.2,
}
}
}
pub const CORE_INSTRUCTION_FILES: &[&str] = &[
"CLAUDE.md",
"AGENTS.md",
".cursorrules",
".github/copilot-instructions.md",
".aider.conf.yml",
"GEMINI.md",
".windsurfrules",
"CONVENTIONS.md",
"CODING_GUIDELINES.md",
];
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InstructionFileType {
ClaudeMd,
AgentsMd,
CursorRules,
CopilotInstructions,
GeminiMd,
AiderConf,
ConventionsMd,
WindsurfRules,
CodingGuidelines,
Custom,
}
impl InstructionFileType {
pub fn from_filename(filename: &str) -> Self {
match filename.to_lowercase().as_str() {
"claude.md" => Self::ClaudeMd,
"agents.md" => Self::AgentsMd,
".cursorrules" => Self::CursorRules,
"copilot-instructions.md" => Self::CopilotInstructions,
"gemini.md" => Self::GeminiMd,
".aider.conf.yml" => Self::AiderConf,
"conventions.md" => Self::ConventionsMd,
".windsurfrules" => Self::WindsurfRules,
"coding_guidelines.md" | "coding-guidelines.md" => Self::CodingGuidelines,
_ => Self::Custom,
}
}
pub fn as_tag(&self) -> &'static str {
match self {
Self::ClaudeMd => "claude-md",
Self::AgentsMd => "agents-md",
Self::CursorRules => "cursorrules",
Self::CopilotInstructions => "copilot-instructions",
Self::GeminiMd => "gemini-md",
Self::AiderConf => "aider-conf",
Self::ConventionsMd => "conventions-md",
Self::WindsurfRules => "windsurfrules",
Self::CodingGuidelines => "coding-guidelines",
Self::Custom => "custom-instructions",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileFormat {
Markdown,
Yaml,
PlainText,
}
impl FileFormat {
pub fn from_filename(filename: &str) -> Self {
let lower = filename.to_lowercase();
if lower.ends_with(".md") {
Self::Markdown
} else if lower.ends_with(".yml") || lower.ends_with(".yaml") {
Self::Yaml
} else {
Self::PlainText
}
}
pub fn as_str(&self) -> &'static str {
match self {
Self::Markdown => "markdown",
Self::Yaml => "yaml",
Self::PlainText => "plaintext",
}
}
}
#[derive(Debug, Clone)]
pub struct DiscoveredFile {
pub path: PathBuf,
pub filename: String,
pub size: u64,
pub content: String,
pub file_type: InstructionFileType,
pub format: FileFormat,
pub content_hash: String,
pub mtime: SystemTime,
pub project_path: PathBuf,
}
#[derive(Debug, Clone)]
pub struct ParsedInstructions {
pub sections: Vec<ParsedSection>,
pub raw_content: String,
pub file_hash: String,
}
#[derive(Debug, Clone)]
pub struct ParsedSection {
pub title: String,
pub content: String,
pub section_path: String,
pub section_index: usize,
pub heading_level: usize,
pub heading_anchor: String,
pub content_hash: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanResult {
pub project_path: String,
pub files_found: usize,
pub memories_created: usize,
pub memories_updated: usize,
pub files_skipped: usize,
pub errors: Vec<String>,
pub scanned_at: DateTime<Utc>,
}
pub trait InstructionFileParser: Send + Sync {
fn parse(&self, content: &str) -> Result<ParsedInstructions>;
}
pub struct MarkdownParser;
impl InstructionFileParser for MarkdownParser {
fn parse(&self, content: &str) -> Result<ParsedInstructions> {
let file_hash = hash_content(content);
let mut sections = Vec::new();
let mut current_section: Option<(String, String, usize, Vec<String>)> = None;
let mut section_index = 0;
let mut heading_stack: Vec<(usize, String)> = Vec::new();
for line in content.lines() {
if let Some((level, title)) = parse_markdown_heading(line) {
if let Some((title, content, level, path_parts)) = current_section.take() {
if !content.trim().is_empty() {
let section_path = path_parts.join(" > ");
sections.push(ParsedSection {
title: title.clone(),
content: content.trim().to_string(),
section_path,
section_index,
heading_level: level,
heading_anchor: slugify(&title),
content_hash: hash_content(&content),
});
section_index += 1;
}
}
while heading_stack
.last()
.map(|(l, _)| *l >= level)
.unwrap_or(false)
{
heading_stack.pop();
}
heading_stack.push((level, title.clone()));
let path_parts: Vec<String> =
heading_stack.iter().map(|(_, t)| t.clone()).collect();
current_section = Some((title, String::new(), level, path_parts));
} else if let Some((_, ref mut content, _, _)) = current_section {
content.push_str(line);
content.push('\n');
}
}
if let Some((title, content, level, path_parts)) = current_section {
if !content.trim().is_empty() {
let section_path = path_parts.join(" > ");
sections.push(ParsedSection {
title: title.clone(),
content: content.trim().to_string(),
section_path,
section_index,
heading_level: level,
heading_anchor: slugify(&title),
content_hash: hash_content(&content),
});
}
}
Ok(ParsedInstructions {
sections,
raw_content: content.to_string(),
file_hash,
})
}
}
pub struct YamlParser;
impl InstructionFileParser for YamlParser {
fn parse(&self, content: &str) -> Result<ParsedInstructions> {
let file_hash = hash_content(content);
let sections = vec![ParsedSection {
title: "Configuration".to_string(),
content: content.to_string(),
section_path: "Configuration".to_string(),
section_index: 0,
heading_level: 1,
heading_anchor: "configuration".to_string(),
content_hash: file_hash.clone(),
}];
Ok(ParsedInstructions {
sections,
raw_content: content.to_string(),
file_hash,
})
}
}
pub struct PlainTextParser;
impl InstructionFileParser for PlainTextParser {
fn parse(&self, content: &str) -> Result<ParsedInstructions> {
let file_hash = hash_content(content);
let sections = vec![ParsedSection {
title: "Instructions".to_string(),
content: content.to_string(),
section_path: "Instructions".to_string(),
section_index: 0,
heading_level: 1,
heading_anchor: "instructions".to_string(),
content_hash: file_hash.clone(),
}];
Ok(ParsedInstructions {
sections,
raw_content: content.to_string(),
file_hash,
})
}
}
pub struct ProjectContextEngine {
config: ProjectContextConfig,
markdown_parser: MarkdownParser,
yaml_parser: YamlParser,
plaintext_parser: PlainTextParser,
}
impl ProjectContextEngine {
pub fn new() -> Self {
Self::with_config(ProjectContextConfig::default())
}
pub fn with_config(config: ProjectContextConfig) -> Self {
Self {
config,
markdown_parser: MarkdownParser,
yaml_parser: YamlParser,
plaintext_parser: PlainTextParser,
}
}
fn get_parser(&self, format: FileFormat) -> &dyn InstructionFileParser {
match format {
FileFormat::Markdown => &self.markdown_parser,
FileFormat::Yaml => &self.yaml_parser,
FileFormat::PlainText => &self.plaintext_parser,
}
}
pub fn scan_directory(&self, path: &Path) -> Result<Vec<DiscoveredFile>> {
let (files, _skipped) = self.scan_directory_with_stats(path)?;
Ok(files)
}
pub fn scan_directory_with_stats(&self, path: &Path) -> Result<(Vec<DiscoveredFile>, usize)> {
if !self.config.enabled {
return Ok((Vec::new(), 0));
}
let mut discovered = Vec::new();
let mut skipped = 0;
let project_path = path.to_path_buf();
for pattern in CORE_INSTRUCTION_FILES {
let file_path = path.join(pattern);
if file_path.exists() && file_path.is_file() {
match self.read_file(&file_path, &project_path) {
Ok(Some(file)) => discovered.push(file),
Ok(None) => skipped += 1, Err(e) => {
tracing::warn!("Error reading {}: {}", file_path.display(), e);
}
}
}
}
if self.config.scan_parents {
if let Some(parent) = path.parent() {
if parent != path {
let (parent_files, parent_skipped) = self.scan_directory_with_stats(parent)?;
discovered.extend(parent_files);
skipped += parent_skipped;
}
}
}
Ok((discovered, skipped))
}
fn read_file(&self, path: &Path, project_path: &Path) -> Result<Option<DiscoveredFile>> {
let metadata = fs::metadata(path)?;
let size = metadata.len();
if size > self.config.max_file_size {
tracing::info!(
"Skipping {} (size {} > max {})",
path.display(),
size,
self.config.max_file_size
);
return Ok(None);
}
let content = fs::read_to_string(path)?;
let filename = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
let file_type = InstructionFileType::from_filename(&filename);
let format = FileFormat::from_filename(&filename);
let content_hash = hash_content(&content);
let mtime = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
Ok(Some(DiscoveredFile {
path: path.to_path_buf(),
filename,
size,
content,
file_type,
format,
content_hash,
mtime,
project_path: project_path.to_path_buf(),
}))
}
pub fn parse_file(&self, file: &DiscoveredFile) -> Result<ParsedInstructions> {
let parser = self.get_parser(file.format);
parser.parse(&file.content)
}
pub fn file_to_memory(&self, file: &DiscoveredFile) -> Memory {
let mut metadata = HashMap::new();
metadata.insert(
"source_file".to_string(),
serde_json::Value::String(file.path.to_string_lossy().to_string()),
);
metadata.insert(
"file_type".to_string(),
serde_json::Value::String(file.file_type.as_tag().to_string()),
);
metadata.insert(
"project_path".to_string(),
serde_json::Value::String(file.project_path.to_string_lossy().to_string()),
);
metadata.insert(
"file_hash".to_string(),
serde_json::Value::String(file.content_hash.clone()),
);
let mtime_rfc3339 = file
.mtime
.duration_since(std::time::UNIX_EPOCH)
.map(|d| DateTime::<Utc>::from(std::time::UNIX_EPOCH + d).to_rfc3339())
.unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string());
metadata.insert(
"file_mtime".to_string(),
serde_json::Value::String(mtime_rfc3339),
);
Memory {
id: 0,
content: file.content.clone(),
memory_type: MemoryType::Context,
tags: vec![
"project-context".to_string(),
file.file_type.as_tag().to_string(),
],
metadata,
importance: 0.8, access_count: 0,
created_at: Utc::now(),
updated_at: Utc::now(),
last_accessed_at: None,
owner_id: None,
visibility: self.config.default_visibility,
scope: MemoryScope::Global,
workspace: "default".to_string(),
tier: MemoryTier::Permanent,
version: 1,
has_embedding: false,
expires_at: None,
content_hash: None, event_time: None,
event_duration_seconds: None,
trigger_pattern: None,
procedure_success_count: 0,
procedure_failure_count: 0,
summary_of_id: None,
lifecycle_state: crate::types::LifecycleState::Active,
media_url: None,
}
}
pub fn section_to_memory(
&self,
section: &ParsedSection,
file: &DiscoveredFile,
parent_id: i64,
) -> Memory {
let mut metadata = HashMap::new();
metadata.insert(
"source_file".to_string(),
serde_json::Value::String(file.path.to_string_lossy().to_string()),
);
metadata.insert(
"file_type".to_string(),
serde_json::Value::String(file.file_type.as_tag().to_string()),
);
metadata.insert(
"project_path".to_string(),
serde_json::Value::String(file.project_path.to_string_lossy().to_string()),
);
metadata.insert(
"section_path".to_string(),
serde_json::Value::String(section.section_path.clone()),
);
metadata.insert(
"section_index".to_string(),
serde_json::json!(section.section_index),
);
metadata.insert(
"content_hash".to_string(),
serde_json::Value::String(section.content_hash.clone()),
);
metadata.insert(
"heading_anchor".to_string(),
serde_json::Value::String(section.heading_anchor.clone()),
);
metadata.insert("parent_memory_id".to_string(), serde_json::json!(parent_id));
let content = format!("# {}\n\n{}", section.title, section.content);
Memory {
id: 0,
content,
memory_type: MemoryType::Context,
tags: vec![
"project-context".to_string(),
"section".to_string(),
file.file_type.as_tag().to_string(),
],
metadata,
importance: 0.7,
access_count: 0,
created_at: Utc::now(),
updated_at: Utc::now(),
last_accessed_at: None,
owner_id: None,
visibility: self.config.default_visibility,
scope: MemoryScope::Global,
workspace: "default".to_string(),
tier: MemoryTier::Permanent,
version: 1,
has_embedding: false,
expires_at: None,
content_hash: None, event_time: None,
event_duration_seconds: None,
trigger_pattern: None,
procedure_success_count: 0,
procedure_failure_count: 0,
summary_of_id: None,
lifecycle_state: crate::types::LifecycleState::Active,
media_url: None,
}
}
pub fn config(&self) -> &ProjectContextConfig {
&self.config
}
}
impl Default for ProjectContextEngine {
fn default() -> Self {
Self::new()
}
}
fn hash_content(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("sha256:{}", hex::encode(hasher.finalize()))
}
fn parse_markdown_heading(line: &str) -> Option<(usize, String)> {
let trimmed = line.trim_start();
if !trimmed.starts_with('#') {
return None;
}
let level = trimmed.chars().take_while(|&c| c == '#').count();
if level == 0 || level > 6 {
return None;
}
let title = trimmed[level..].trim().to_string();
if title.is_empty() {
return None;
}
Some((level, title))
}
fn slugify(title: &str) -> String {
title
.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '-' })
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_instruction_file_type_detection() {
assert_eq!(
InstructionFileType::from_filename("CLAUDE.md"),
InstructionFileType::ClaudeMd
);
assert_eq!(
InstructionFileType::from_filename(".cursorrules"),
InstructionFileType::CursorRules
);
assert_eq!(
InstructionFileType::from_filename(".aider.conf.yml"),
InstructionFileType::AiderConf
);
assert_eq!(
InstructionFileType::from_filename("random.txt"),
InstructionFileType::Custom
);
}
#[test]
fn test_file_format_detection() {
assert_eq!(FileFormat::from_filename("CLAUDE.md"), FileFormat::Markdown);
assert_eq!(
FileFormat::from_filename(".aider.conf.yml"),
FileFormat::Yaml
);
assert_eq!(
FileFormat::from_filename(".cursorrules"),
FileFormat::PlainText
);
}
#[test]
fn test_markdown_heading_parsing() {
assert_eq!(
parse_markdown_heading("# Title"),
Some((1, "Title".to_string()))
);
assert_eq!(
parse_markdown_heading("## Subtitle"),
Some((2, "Subtitle".to_string()))
);
assert_eq!(
parse_markdown_heading("### Deep Heading"),
Some((3, "Deep Heading".to_string()))
);
assert_eq!(parse_markdown_heading("Not a heading"), None);
assert_eq!(parse_markdown_heading("#"), None); }
#[test]
fn test_slugify() {
assert_eq!(slugify("Hello World"), "hello-world");
assert_eq!(slugify("Unit Testing"), "unit-testing");
assert_eq!(slugify("API & REST"), "api-rest");
assert_eq!(slugify(" Multiple Spaces "), "multiple-spaces");
}
#[test]
fn test_hash_content() {
let hash1 = hash_content("hello");
let hash2 = hash_content("hello");
let hash3 = hash_content("world");
assert_eq!(hash1, hash2);
assert_ne!(hash1, hash3);
assert!(hash1.starts_with("sha256:"));
}
#[test]
fn test_markdown_parser() {
let content = r#"# Main Title
Some intro text.
## Section One
Content of section one.
## Section Two
Content of section two.
### Subsection
Nested content.
"#;
let parser = MarkdownParser;
let result = parser.parse(content).unwrap();
assert_eq!(result.sections.len(), 4);
assert_eq!(result.sections[0].title, "Main Title");
assert_eq!(result.sections[0].section_path, "Main Title");
assert_eq!(result.sections[1].title, "Section One");
assert_eq!(result.sections[1].section_path, "Main Title > Section One");
assert_eq!(result.sections[2].title, "Section Two");
assert_eq!(result.sections[3].title, "Subsection");
assert_eq!(
result.sections[3].section_path,
"Main Title > Section Two > Subsection"
);
}
#[test]
fn test_yaml_parser() {
let content = "key: value\nother: data";
let parser = YamlParser;
let result = parser.parse(content).unwrap();
assert_eq!(result.sections.len(), 1);
assert_eq!(result.sections[0].title, "Configuration");
}
#[test]
fn test_plaintext_parser() {
let content = "Some plain text instructions";
let parser = PlainTextParser;
let result = parser.parse(content).unwrap();
assert_eq!(result.sections.len(), 1);
assert_eq!(result.sections[0].title, "Instructions");
}
#[test]
fn test_engine_default_config() {
let engine = ProjectContextEngine::new();
assert!(engine.config().enabled);
assert_eq!(engine.config().max_file_size, 1024 * 1024);
assert!(!engine.config().scan_parents);
}
#[test]
fn test_file_to_memory() {
let engine = ProjectContextEngine::new();
let file = DiscoveredFile {
path: PathBuf::from("/project/CLAUDE.md"),
filename: "CLAUDE.md".to_string(),
size: 100,
content: "# Test\n\nContent".to_string(),
file_type: InstructionFileType::ClaudeMd,
format: FileFormat::Markdown,
content_hash: "sha256:abc123".to_string(),
mtime: SystemTime::UNIX_EPOCH,
project_path: PathBuf::from("/project"),
};
let memory = engine.file_to_memory(&file);
assert_eq!(memory.memory_type, MemoryType::Context);
assert!(memory.tags.contains(&"project-context".to_string()));
assert!(memory.tags.contains(&"claude-md".to_string()));
assert_eq!(memory.importance, 0.8);
}
#[test]
fn test_section_to_memory() {
let engine = ProjectContextEngine::new();
let file = DiscoveredFile {
path: PathBuf::from("/project/CLAUDE.md"),
filename: "CLAUDE.md".to_string(),
size: 100,
content: "# Test".to_string(),
file_type: InstructionFileType::ClaudeMd,
format: FileFormat::Markdown,
content_hash: "sha256:abc".to_string(),
mtime: SystemTime::UNIX_EPOCH,
project_path: PathBuf::from("/project"),
};
let section = ParsedSection {
title: "Guidelines".to_string(),
content: "Follow these rules".to_string(),
section_path: "Main > Guidelines".to_string(),
section_index: 1,
heading_level: 2,
heading_anchor: "guidelines".to_string(),
content_hash: "sha256:def".to_string(),
};
let memory = engine.section_to_memory(§ion, &file, 123);
assert!(memory.content.contains("# Guidelines"));
assert!(memory.tags.contains(&"section".to_string()));
assert_eq!(
memory.metadata.get("parent_memory_id"),
Some(&serde_json::Value::Number(123.into()))
);
}
}