1use std::collections::HashMap;
32
33use regex::{Regex, RegexBuilder};
34
35#[derive(Debug, Clone, Copy)]
41pub struct SessionPolicy {
42 pub min_occurrences: u32,
45 pub max_active_bindings: usize,
48 pub turn_decay: f32,
52 pub min_term_words: usize,
56}
57
58impl Default for SessionPolicy {
59 fn default() -> Self {
60 Self {
61 min_occurrences: 2,
62 max_active_bindings: 64,
63 turn_decay: 0.85,
64 min_term_words: 2,
65 }
66 }
67}
68
69#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct Binding {
74 pub alias: String,
76 pub term: String,
78}
79
80#[derive(Debug, Default)]
81struct CandidateState {
82 occurrences: u32,
83 score: f32,
84}
85
86#[derive(Debug)]
88pub struct SessionSymbolTable {
89 policy: SessionPolicy,
90 candidates: HashMap<String, CandidateState>,
92 bindings: Vec<Binding>,
94 next_id: u32,
96}
97
98impl SessionSymbolTable {
99 #[must_use]
101 pub fn new(policy: SessionPolicy) -> Self {
102 Self {
103 policy,
104 candidates: HashMap::new(),
105 bindings: Vec::new(),
106 next_id: 0,
107 }
108 }
109
110 #[must_use]
112 pub fn bindings(&self) -> &[Binding] {
113 &self.bindings
114 }
115
116 #[must_use]
122 pub fn prefix(&self) -> String {
123 if self.bindings.is_empty() {
124 return String::new();
125 }
126 let mut out = String::with_capacity(self.bindings.len() * 24);
127 for b in &self.bindings {
128 out.push_str(&b.alias);
129 out.push('=');
130 out.push_str(&b.term);
131 out.push('\n');
132 }
133 out
134 }
135
136 pub fn observe(&mut self, text: &str) {
140 for state in self.candidates.values_mut() {
143 state.score *= self.policy.turn_decay;
144 }
145 for term in extract_candidate_terms(text, self.policy.min_term_words) {
146 let key = term.to_ascii_lowercase();
147 let state = self.candidates.entry(key.clone()).or_default();
148 state.occurrences += 1;
149 let len_bonus = u32::try_from(term.split_whitespace().count()).unwrap_or(1) as f32;
152 state.score += len_bonus;
153 if state.occurrences >= self.policy.min_occurrences && !self.has_binding_for(&key) {
154 self.promote(term, key.clone());
155 }
156 }
157 self.enforce_cap();
158 }
159
160 #[must_use]
164 pub fn rewrite(&self, text: &str) -> String {
165 if self.bindings.is_empty() {
166 return text.to_owned();
167 }
168 let mut sorted: Vec<&Binding> = self.bindings.iter().collect();
169 sorted.sort_by_key(|b| std::cmp::Reverse(b.term.len()));
170 let mut out = text.to_owned();
171 for b in sorted {
172 let pat = format!(r"\b{}\b", regex::escape(&b.term));
173 if let Ok(re) = RegexBuilder::new(&pat).case_insensitive(true).build() {
174 out = re.replace_all(&out, b.alias.as_str()).into_owned();
175 }
176 }
177 out
178 }
179
180 pub fn observe_and_rewrite(&mut self, text: &str) -> String {
185 self.observe(text);
186 self.rewrite(text)
187 }
188
189 fn has_binding_for(&self, lowered_term: &str) -> bool {
190 self.bindings
191 .iter()
192 .any(|b| b.term.eq_ignore_ascii_case(lowered_term))
193 }
194
195 fn promote(&mut self, term: String, _key_lower: String) {
196 let alias = format!("s{}", self.next_id);
197 self.next_id += 1;
198 self.bindings.push(Binding { alias, term });
199 }
200
201 fn enforce_cap(&mut self) {
202 if self.bindings.len() <= self.policy.max_active_bindings {
203 return;
204 }
205 let drop = self.bindings.len() - self.policy.max_active_bindings;
210 self.bindings.drain(0..drop);
211 }
212}
213
214impl Default for SessionSymbolTable {
215 fn default() -> Self {
216 Self::new(SessionPolicy::default())
217 }
218}
219
220const LEADING_ARTICLES: &[&str] = &["The", "A", "An", "This", "That", "These", "Those"];
228
229fn strip_leading_article(s: &str) -> &str {
230 let mut iter = s.splitn(2, char::is_whitespace);
231 let first = iter.next().unwrap_or("");
232 let rest = iter.next().unwrap_or("");
233 if LEADING_ARTICLES.contains(&first) && !rest.is_empty() {
234 rest
235 } else {
236 s
237 }
238}
239
240fn extract_candidate_terms(text: &str, min_words: usize) -> Vec<String> {
241 let mut out = Vec::new();
242 let cap_phrase = Regex::new(r"\b([A-Z][a-zA-Z0-9_]{2,}(?:\s+[A-Z][a-zA-Z0-9_]{2,}){1,3})\b")
244 .expect("cap-phrase pattern");
245 for m in cap_phrase.find_iter(text) {
246 let raw = m.as_str().trim();
247 let s = strip_leading_article(raw);
251 if s.split_whitespace().count() >= min_words {
252 out.push(s.to_owned());
253 }
254 }
255 let path_re = Regex::new(r"[A-Za-z0-9_\-\.]+(?:/[A-Za-z0-9_\-\.]+){1,}").expect("path pattern");
257 for m in path_re.find_iter(text) {
258 let s = m.as_str().trim_end_matches('.');
259 if min_words <= 1 || s.split('/').count() >= min_words {
263 out.push(s.to_owned());
264 }
265 }
266 out
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272
273 #[test]
274 fn first_observation_does_not_promote() {
275 let mut t = SessionSymbolTable::default();
276 t.observe("Authentication Module is the entry point.");
277 assert_eq!(t.bindings().len(), 0);
279 }
280
281 #[test]
282 fn second_observation_promotes_to_binding() {
283 let mut t = SessionSymbolTable::default();
284 t.observe("The Authentication Module starts.");
285 t.observe("The Authentication Module fires the policy.");
286 assert_eq!(t.bindings().len(), 1, "{:?}", t.bindings());
287 assert_eq!(t.bindings()[0].term, "Authentication Module");
288 }
289
290 #[test]
291 fn rewrite_replaces_all_occurrences() {
292 let mut t = SessionSymbolTable::default();
293 t.observe("Authentication Module v1.");
294 t.observe("Authentication Module v2.");
295 let out = t.rewrite("Authentication Module is fine.");
296 assert!(out.contains("s0"));
297 assert!(!out.contains("Authentication Module"));
298 }
299
300 #[test]
301 fn prefix_emits_one_line_per_binding() {
302 let mut t = SessionSymbolTable::default();
303 t.observe("Policy Engine here.");
304 t.observe("Policy Engine again.");
305 t.observe("Session Store later.");
306 t.observe("Session Store again.");
307 let p = t.prefix();
308 assert!(p.contains("=Policy Engine"), "prefix={p}");
309 assert!(p.contains("=Session Store"), "prefix={p}");
310 assert_eq!(p.lines().count(), 2);
311 }
312
313 #[test]
314 fn empty_session_emits_empty_prefix() {
315 let t = SessionSymbolTable::default();
316 assert_eq!(t.prefix(), "");
317 }
318
319 #[test]
320 fn pool_cap_is_enforced() {
321 let policy = SessionPolicy {
322 min_occurrences: 2,
323 max_active_bindings: 2,
324 turn_decay: 1.0,
325 min_term_words: 2,
326 };
327 let mut t = SessionSymbolTable::new(policy);
328 for term in ["Acme Service", "Beacon Service", "Cinder Service"] {
329 t.observe(&format!("{term} first."));
330 t.observe(&format!("{term} second."));
331 }
332 assert_eq!(t.bindings().len(), 2);
333 }
334
335 #[test]
336 fn paths_become_candidates() {
337 let mut t = SessionSymbolTable::default();
338 t.observe("look at src/encoder.rs");
339 t.observe("now src/encoder.rs again");
340 let out = t.rewrite("src/encoder.rs is the file");
341 assert!(!out.contains("src/encoder.rs"));
342 }
343
344 #[test]
345 fn longer_binding_wins_over_shorter() {
346 let mut t = SessionSymbolTable::default();
347 t.observe("Authentication Module Plus initial seed.");
348 t.observe("Authentication Module Plus second seed.");
349 t.observe("Authentication Module fires.");
350 t.observe("Authentication Module fires again.");
351 let out = t.rewrite("Authentication Module Plus is the longer one.");
352 assert!(out.contains("s0"), "{out}");
354 assert!(!out.contains("Authentication Module Plus"));
355 }
356
357 #[test]
358 fn observe_and_rewrite_returns_alias_substituted_text() {
359 let mut t = SessionSymbolTable::default();
360 t.observe_and_rewrite("Policy Engine boot.");
361 let out = t.observe_and_rewrite("Policy Engine boot.");
362 assert!(out.contains("s0"));
363 }
364
365 #[test]
366 fn rewrite_unchanged_when_no_bindings() {
367 let t = SessionSymbolTable::default();
368 let s = "nothing seen yet";
369 assert_eq!(t.rewrite(s), s);
370 }
371
372 #[test]
373 fn decay_keeps_state_bounded_under_repeated_observation() {
374 let policy = SessionPolicy {
377 min_occurrences: 2,
378 max_active_bindings: 4,
379 turn_decay: 0.5,
380 min_term_words: 2,
381 };
382 let mut t = SessionSymbolTable::new(policy);
383 for _ in 0..50 {
384 t.observe("Auth Module again.");
385 }
386 assert_eq!(t.bindings().len(), 1);
388 }
389
390 #[test]
391 fn long_session_compound_savings_simulation() {
392 let mut t = SessionSymbolTable::default();
395 let canonical = "Authentication Module forwards to Policy Engine \
396 for Validation Service against Session Store.";
397 for _ in 0..2 {
399 t.observe(canonical);
400 }
401 let rewritten = t.rewrite(canonical);
402 assert!(
403 rewritten.len() < canonical.len() * 3 / 4,
404 "expected >25% length reduction, got {} -> {}",
405 canonical.len(),
406 rewritten.len()
407 );
408 }
409}