Skip to main content

lean_ctx/core/
sanitize.rs

1const MAX_METADATA_LEN: usize = 200;
2
3pub fn neutralize_metadata(input: &str) -> String {
4    let mut out = String::with_capacity(input.len().min(MAX_METADATA_LEN));
5    let mut count = 0usize;
6    for ch in input.chars() {
7        if count >= MAX_METADATA_LEN {
8            out.push('…');
9            break;
10        }
11        if (ch as u32) < 0x20 && ch != '\n' && ch != '\t' && ch != '\r' {
12            continue;
13        }
14        match ch {
15            '<' => out.push('‹'),
16            '>' => out.push('›'),
17            '`' => out.push('\''),
18            _ => out.push(ch),
19        }
20        count += 1;
21    }
22    out
23}
24
25pub fn neutralize_shell_content(input: &str) -> String {
26    let mut out = String::with_capacity(input.len());
27    let mut i = 0;
28    let chars: Vec<char> = input.chars().collect();
29    while i < chars.len() {
30        let ch = chars[i];
31        if (ch as u32) < 0x20 && ch != '\n' && ch != '\t' && ch != '\r' {
32            i += 1;
33            continue;
34        }
35        out.push(ch);
36        i += 1;
37    }
38    out
39}
40
41fn to_hex(bytes: &[u8]) -> String {
42    const HEX: &[u8; 16] = b"0123456789abcdef";
43    let mut out = String::with_capacity(bytes.len() * 2);
44    for &b in bytes {
45        out.push(HEX[(b >> 4) as usize] as char);
46        out.push(HEX[(b & 0x0f) as usize] as char);
47    }
48    out
49}
50
51fn safe_label(label: &str) -> String {
52    let mut out = String::new();
53    for ch in label.chars() {
54        if ch.is_ascii_alphanumeric() {
55            out.push(ch.to_ascii_uppercase());
56        } else if ch == '_' || ch == '-' {
57            out.push('_');
58        }
59    }
60    if out.is_empty() {
61        "BLOCK".to_string()
62    } else {
63        out
64    }
65}
66
67pub fn fence_content(label: &str, content: &str) -> String {
68    let label = safe_label(label);
69    let mut bytes = [0u8; 16];
70    let _ = getrandom::fill(&mut bytes);
71    let token = to_hex(&bytes);
72    let marker = format!("LCTX_{label}_{token}");
73    format!("‹‹‹{marker}›››\n{content}\n‹‹‹{marker}›››")
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn neutralize_replaces_angle_and_backticks() {
82        let s = "<tag>`code`</tag>";
83        let out = neutralize_metadata(s);
84        assert!(out.contains('‹'));
85        assert!(out.contains('›'));
86        assert!(!out.contains('`'));
87    }
88
89    #[test]
90    fn fence_wraps_symmetrically() {
91        let out = fence_content("knowledge", "hello");
92        let lines: Vec<&str> = out.lines().collect();
93        assert!(lines.len() >= 3);
94        assert_eq!(lines[0], lines[lines.len() - 1]);
95    }
96}