use crate::models::{Memory, MemoryKind, MemoryKindAutoClassify};
#[must_use]
pub fn classify_by_regex(title: &str, content: &str) -> Option<MemoryKind> {
let mut hay = String::with_capacity(title.len() + content.len() + 1);
hay.push_str(title);
hay.push(' ');
hay.push_str(content);
let truncated_len = hay.len().min(4096);
hay.truncate(truncated_len);
let lower = hay.to_ascii_lowercase();
if has_speaker_tag(&hay) {
return Some(MemoryKind::Conversation);
}
if contains_any(
&lower,
&[
"decided to ",
"we will ",
"i will ",
"chose to ",
"approved the ",
"rejecting the ",
"decision: ",
],
) {
return Some(MemoryKind::Decision);
}
if contains_any(
&lower,
&[
"happened on ",
"happened at ",
"occurred on ",
"occurred at ",
"deployed at ",
"incident at ",
"event: ",
"at 09:",
"at 10:",
"at 11:",
"at 12:",
"at 13:",
"at 14:",
"at 15:",
"at 16:",
"at 17:",
"at 18:",
"at 19:",
"at 20:",
],
) {
return Some(MemoryKind::Event);
}
if contains_any(
&lower,
&[
" depends on ",
" derives from ",
" is part of ",
" contains ",
" contradicts ",
" supersedes ",
" relates to ",
],
) {
return Some(MemoryKind::Relation);
}
if contains_any(
&lower,
&[
" is a person",
" is an organisation",
" is a product",
" is a service",
" is a system",
" is a team",
"person: ",
"org: ",
"entity: ",
],
) {
return Some(MemoryKind::Entity);
}
if contains_any(
&lower,
&[
"claim: ",
"we claim ",
"i claim ",
"asserts that ",
"states that ",
"is true that ",
"is false that ",
],
) {
return Some(MemoryKind::Claim);
}
if contains_any(
&lower,
&[
"is_a ",
"is defined as ",
"concept of ",
"definition: ",
"by definition ",
"refers to ",
"is the name of ",
],
) {
return Some(MemoryKind::Concept);
}
None
}
fn has_speaker_tag(hay: &str) -> bool {
for line in hay.lines() {
let line = line.trim_start();
if let Some(colon_idx) = line.find(':') {
let name = &line[..colon_idx];
if !name.is_empty()
&& name.len() <= 32
&& name.chars().next().is_some_and(|c| c.is_ascii_uppercase())
&& name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
{
return true;
}
}
let lower = line.to_ascii_lowercase();
if lower.contains(" said ") || lower.contains(" says ") || lower.contains(" replied ") {
return true;
}
}
false
}
fn contains_any(hay: &str, needles: &[&str]) -> bool {
needles.iter().any(|n| hay.contains(n))
}
pub fn maybe_auto_classify(mem: &mut Memory, policy: Option<MemoryKindAutoClassify>) -> MemoryKind {
if mem.memory_kind != MemoryKind::Observation {
return mem.memory_kind;
}
let policy = policy.unwrap_or_default();
if matches!(policy, MemoryKindAutoClassify::Off) {
return mem.memory_kind;
}
if let Some(kind) = classify_by_regex(&mem.title, &mem.content) {
mem.memory_kind = kind;
return kind;
}
if matches!(policy, MemoryKindAutoClassify::RegexThenLlm)
&& let Some(kind) = llm_classify_shim(&mem.title, &mem.content)
{
mem.memory_kind = kind;
return kind;
}
mem.memory_kind
}
fn llm_classify_shim(_title: &str, _content: &str) -> Option<MemoryKind> {
tracing::debug!(
"auto_classify_kind: RegexThenLlm requested but no LLM classifier wired; \
falling back to RegexOnly semantics"
);
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::Memory;
fn fresh_mem(title: &str, content: &str) -> Memory {
Memory {
title: title.to_string(),
content: content.to_string(),
..Default::default()
}
}
#[test]
fn off_policy_is_noop() {
let mut m = fresh_mem("X depends on Y", "");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::Off));
assert_eq!(verdict, MemoryKind::Observation);
assert_eq!(m.memory_kind, MemoryKind::Observation);
}
#[test]
fn none_policy_is_noop() {
let mut m = fresh_mem("X depends on Y", "");
let verdict = maybe_auto_classify(&mut m, None);
assert_eq!(verdict, MemoryKind::Observation);
}
#[test]
fn caller_supplied_kind_wins() {
let mut m = fresh_mem("X depends on Y", "");
m.memory_kind = MemoryKind::Claim;
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Claim);
assert_eq!(m.memory_kind, MemoryKind::Claim);
}
#[test]
fn relation_pattern_fires_under_regex_only() {
let mut m = fresh_mem("subsystem A", "A depends on B for token expiry");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Relation);
}
#[test]
fn event_pattern_fires_under_regex_only() {
let mut m = fresh_mem("deploy", "The cutover happened at 14:32 UTC");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Event);
}
#[test]
fn conversation_pattern_fires_under_regex_only() {
let mut m = fresh_mem("chat", "Alice: should we deploy?\nBob: yes");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Conversation);
}
#[test]
fn concept_pattern_fires_on_is_a_marker() {
let mut m = fresh_mem("ownership", "ownership is_a Rust borrow-checker rule");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Concept);
}
#[test]
fn decision_pattern_fires_under_regex_only() {
let mut m = fresh_mem("api migration", "We decided to deprecate v1 by Q3");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Decision);
}
#[test]
fn entity_pattern_fires_under_regex_only() {
let mut m = fresh_mem(
"acme corp",
"Acme corp is a service provider in our supply chain",
);
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Entity);
}
#[test]
fn claim_pattern_fires_under_regex_only() {
let mut m = fresh_mem(
"posture",
"We claim that the GC scheduler is starvation-free",
);
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Claim);
}
#[test]
fn regex_miss_keeps_observation() {
let mut m = fresh_mem("note", "just a stray thought without taxonomic signal");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexOnly));
assert_eq!(verdict, MemoryKind::Observation);
}
#[test]
fn regex_then_llm_degrades_to_regex_only_when_no_llm_wired() {
let mut m = fresh_mem("inscrutable", "lorem ipsum dolor sit amet");
let verdict = maybe_auto_classify(&mut m, Some(MemoryKindAutoClassify::RegexThenLlm));
assert_eq!(verdict, MemoryKind::Observation);
}
}