use regex::Regex;
use std::sync::LazyLock;
static MARKERS_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"<!--\s*smos:\S+?\s*-->|(?s)<smos-memory[^>]*>.*?</smos-memory>")
.expect("markers regex literal")
});
static BARE_SESS_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(^|[^A-Za-z0-9_])sess_[A-Za-z0-9_]+").expect("bare sess regex literal")
});
pub fn clean(content: &str) -> String {
let without_markers = MARKERS_RE.replace_all(content, "");
let without_bare = BARE_SESS_RE.replace_all(&without_markers, "${1}");
without_bare.trim().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strips_session_marker_comment() {
let input = "hello\n<!-- smos:sess_abcdef012345 -->";
assert_eq!(clean(input), "hello");
}
#[test]
fn strips_multiline_smos_memory_block() {
let input = "before\n<smos-memory session=\"sess_x\">\n[fact_1] doc\n</smos-memory>\nafter";
let out = clean(input);
assert!(out.contains("before"));
assert!(out.contains("after"));
assert!(!out.contains("smos-memory"));
assert!(!out.contains("fact_1"));
}
#[test]
fn strips_smos_memory_block_with_attributes() {
let input = "<smos-memory session=\"sess_y\" extra=\"value\">body</smos-memory>tail";
let out = clean(input);
assert_eq!(out, "tail");
}
#[test]
fn strips_bare_session_id_preserving_surrounding_text() {
let input = "the session id is sess_abcdef012345 here";
assert_eq!(clean(input), "the session id is here");
}
#[test]
fn preserves_session_id_embedded_in_a_word() {
let input = "obsess_token must survive";
assert_eq!(clean(input), "obsess_token must survive");
}
#[test]
fn preserves_normal_content_without_noise() {
let input = "Just a regular fact about Rust and cargo.";
assert_eq!(clean(input), input);
}
#[test]
fn empty_input_yields_empty_output() {
assert_eq!(clean(""), "");
assert_eq!(clean(" "), "");
}
#[test]
fn strips_bare_id_at_start_of_text() {
let input = "sess_aabbccddeeff is the id";
assert_eq!(clean(input), "is the id");
}
#[test]
fn strips_multiple_distinct_noise_patterns_in_one_pass() {
let input = "marker <!-- smos:sess_1 --> bare sess_aabbccddeeff block <smos-memory session=\"s\">x</smos-memory>";
let out = clean(input);
assert_eq!(out, "marker bare block");
}
}