use crate::memory::types::MemoryType;
const CORRECTION_PATTERNS: &[&str] = &[
"actually",
"no, it's",
"that's wrong",
"correction",
"i meant",
"not that",
"i was wrong",
"아니라",
"실제로는",
"정정",
"틀렸",
"잘못 알고",
"내가 틀렸",
"고쳐",
];
const PREFERENCE_PATTERNS: &[&str] = &[
"i prefer",
"always use",
"i like",
"i don't",
"never use",
"i'd rather",
"my preference",
"please use",
"make sure to use",
"선호",
"좋아",
"내가 좋아하",
"항상 써",
"절대 쓰지",
"차라리",
"내 취향",
];
const DECISION_PATTERNS: &[&str] = &[
"decided to",
"we chose",
"let's go with",
"we'll use",
"i decided",
"the decision is",
"going with",
"결정했",
"결정했어",
"하기로 했",
"선택했",
"우리는",
"사용하기로",
];
const SKILL_PATTERNS: &[&str] = &[
"always run",
"before commit",
"every time",
"make sure to",
"workflow is",
"standard procedure",
"first, then",
"step by step",
"하는 방법",
"이렇게 하는",
"항상 실행",
"커밋하기 전",
"표준 절차",
"순서대로",
"먼저 그리고",
];
const PROFILE_PATTERNS: &[&str] = &[
"my name is",
"i work at",
"i'm a ",
"i am a ",
"my role is",
"my job is",
"i specialize",
"my background",
"내 이름은",
"제 이름은",
"나는 ",
"저는 ",
"직업은",
"일하고 있",
"전문",
"내 배경",
];
const EPISODE_PATTERNS: &[&str] = &[
"deployed",
"released",
"launched",
"completed",
"finished",
"started",
];
pub struct AutoClassifier;
impl AutoClassifier {
pub fn infer_memory_type(content: &str, _context: &str) -> MemoryType {
let content_lower = content.to_lowercase();
if Self::is_correction(&content_lower) {
return MemoryType::Fact;
}
if Self::is_preference(&content_lower) {
return MemoryType::Preference;
}
if Self::is_decision(&content_lower) {
return MemoryType::Decision;
}
if Self::is_skill(&content_lower) {
return MemoryType::Skill;
}
if Self::is_profile(&content_lower) {
return MemoryType::UserProfile;
}
if Self::is_episode(&content_lower) {
return MemoryType::Episode;
}
MemoryType::Fact
}
fn is_correction(content_lower: &str) -> bool {
CORRECTION_PATTERNS
.iter()
.any(|p| content_lower.contains(p))
}
fn is_preference(content_lower: &str) -> bool {
PREFERENCE_PATTERNS
.iter()
.any(|p| content_lower.contains(p))
}
fn is_decision(content_lower: &str) -> bool {
DECISION_PATTERNS.iter().any(|p| content_lower.contains(p))
}
fn is_skill(content_lower: &str) -> bool {
SKILL_PATTERNS.iter().any(|p| content_lower.contains(p))
}
fn is_profile(content_lower: &str) -> bool {
PROFILE_PATTERNS.iter().any(|p| content_lower.contains(p))
}
fn is_episode(content_lower: &str) -> bool {
EPISODE_PATTERNS.iter().any(|p| content_lower.contains(p))
}
pub fn extract_tags(content: &str, max_tags: usize) -> Vec<String> {
use std::collections::HashMap;
let mut counts: HashMap<String, u32> = HashMap::new();
for word in content.split_whitespace() {
let w = word
.trim_matches(|c: char| c.is_ascii_punctuation())
.to_lowercase();
if w.len() > 3 && !Self::is_stop_word(&w) {
*counts.entry(w).or_default() += 1;
}
}
let mut tags: Vec<(String, u32)> = counts.into_iter().collect();
tags.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
tags.into_iter().take(max_tags).map(|(w, _)| w).collect()
}
fn is_stop_word(word: &str) -> bool {
const STOP: &[&str] = &[
"that", "this", "with", "from", "have", "been", "were", "will", "would", "could",
"should", "about", "which", "their", "there", "these", "those", "other", "than",
"then", "also", "some",
];
STOP.contains(&word)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_correction() {
assert_eq!(
AutoClassifier::infer_memory_type("Actually, the port is 8080 not 3000", ""),
MemoryType::Fact
);
assert_eq!(
AutoClassifier::infer_memory_type("Correction: the API key expired", ""),
MemoryType::Fact
);
}
#[test]
fn test_classify_preference() {
assert_eq!(
AutoClassifier::infer_memory_type("I prefer dark mode for the editor", ""),
MemoryType::Preference
);
assert_eq!(
AutoClassifier::infer_memory_type("Never use tabs, always use spaces", ""),
MemoryType::Preference
);
}
#[test]
fn test_classify_decision() {
assert_eq!(
AutoClassifier::infer_memory_type("We decided to use Tokio for async runtime", ""),
MemoryType::Decision
);
assert_eq!(
AutoClassifier::infer_memory_type("Let's go with the microservice approach", ""),
MemoryType::Decision
);
}
#[test]
fn test_classify_skill() {
assert_eq!(
AutoClassifier::infer_memory_type("Always run tests before commit", ""),
MemoryType::Skill
);
assert_eq!(
AutoClassifier::infer_memory_type("Standard procedure: lint, test, then deploy", ""),
MemoryType::Skill
);
}
#[test]
fn test_classify_profile() {
assert_eq!(
AutoClassifier::infer_memory_type("My name is Won and I work at Oxios", ""),
MemoryType::UserProfile
);
assert_eq!(
AutoClassifier::infer_memory_type("I'm a backend engineer", ""),
MemoryType::UserProfile
);
}
#[test]
fn test_classify_episode() {
assert_eq!(
AutoClassifier::infer_memory_type("Released v0.2.0 with memory consolidation", ""),
MemoryType::Episode
);
assert_eq!(
AutoClassifier::infer_memory_type("Deployed the new API gateway yesterday", ""),
MemoryType::Episode
);
}
#[test]
fn test_classify_default_fact() {
assert_eq!(
AutoClassifier::infer_memory_type("API uses port 3000", ""),
MemoryType::Fact
);
assert_eq!(
AutoClassifier::infer_memory_type("The database has 42 tables", ""),
MemoryType::Fact
);
}
#[test]
fn test_extract_tags() {
let tags =
AutoClassifier::extract_tags("Rust tokio async runtime memory consolidation system", 5);
assert!(!tags.is_empty());
assert!(
tags.iter()
.any(|t| t.contains("rust") || t.contains("memory"))
);
}
}