1use ainl_memory::{
7 AinlMemoryNode, AinlNodeType, EpisodicNode, GraphStore, SemanticNode, SqliteGraphStore,
8};
9use ainl_persona::{signals::episodic_should_process, MemoryNodeType, PersonaAxis, RawSignal};
10use ainl_semantic_tagger::{
11 extract_correction_behavior, infer_brevity_preference, infer_formality, tag_tool_names,
12 SemanticTag, TagNamespace,
13};
14use serde_json::Value;
15use std::collections::{HashMap, HashSet};
16use uuid::Uuid;
17
18#[derive(Debug, Default, Clone)]
20pub struct PersonaSignalExtractorState {
21 pub pass_seq: u64,
23 pub global_turn_index: u32,
25 implicit_brevity_streak: u8,
26 last_brevity_emit_turn: Option<u32>,
28 formality_run: Option<(FormalityDir, u8)>,
30 domain_cluster_last_emit_pass: HashMap<String, u64>,
32}
33
34impl PersonaSignalExtractorState {
35 pub fn new() -> Self {
36 Self::default()
37 }
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41enum FormalityDir {
42 Informal,
43 Formal,
44}
45
46const BREVITY_DEBOUNCE_TURNS: u32 = 3;
47const DOMAIN_COOLDOWN_PASSES: u64 = 2;
48const DOMAIN_MIN_RECURRENCE_NODE: u32 = 3;
49const DOMAIN_EMIT_AT_LEAST_NODES: usize = 2;
50const DOMAIN_SINGLE_NODE_RECURRENCE: u32 = 6;
51
52fn trace_obj(ep: &EpisodicNode) -> Option<&serde_json::Map<String, Value>> {
53 ep.trace_event.as_ref()?.as_object()
54}
55
56fn user_text(ep: &EpisodicNode) -> String {
57 if let Some(s) = &ep.user_message {
58 return s.clone();
59 }
60 trace_obj(ep)
61 .and_then(|m| m.get("user_message"))
62 .and_then(|v| v.as_str())
63 .unwrap_or("")
64 .to_string()
65}
66
67fn assistant_tokens(ep: &EpisodicNode) -> u32 {
68 if ep.assistant_response_tokens > 0 {
69 return ep.assistant_response_tokens;
70 }
71 trace_obj(ep)
72 .and_then(|m| m.get("assistant_response_tokens"))
73 .and_then(|v| v.as_u64().or_else(|| v.as_f64().map(|f| f as u64)))
74 .map(|u| u as u32)
75 .unwrap_or(0)
76}
77
78fn user_tokens(ep: &EpisodicNode) -> u32 {
79 if ep.user_message_tokens > 0 {
80 return ep.user_message_tokens;
81 }
82 let t = user_text(ep);
83 if t.is_empty() {
84 0
85 } else {
86 t.split_whitespace().count() as u32
87 }
88}
89
90fn implicit_brevity_shape(ep: &EpisodicNode) -> bool {
91 let ut = user_tokens(ep);
92 let atok = assistant_tokens(ep);
93 ut < 12 && atok > 300
94}
95
96fn formality_direction_from_tag(user: &str) -> Option<FormalityDir> {
97 infer_formality(user).and_then(|tag| match tag.value.as_str() {
98 "informal" => Some(FormalityDir::Informal),
99 "formal" => Some(FormalityDir::Formal),
100 _ => None,
101 })
102}
103
104fn brevity_debounce_allows(state: &PersonaSignalExtractorState, turn: u32) -> bool {
105 match state.last_brevity_emit_turn {
106 None => true,
107 Some(prev) if turn.saturating_sub(prev) >= BREVITY_DEBOUNCE_TURNS => true,
108 _ => false,
109 }
110}
111
112fn append_episode_tags(
113 store: &SqliteGraphStore,
114 node_id: Uuid,
115 tags: &[String],
116) -> Result<(), String> {
117 if tags.is_empty() {
118 return Ok(());
119 }
120 let Some(mut node) = store.read_node(node_id)? else {
121 return Ok(());
122 };
123 let AinlNodeType::Episode { ref mut episodic } = node.node_type else {
124 return Ok(());
125 };
126 let existing: HashSet<&str> = episodic
127 .persona_signals_emitted
128 .iter()
129 .map(|s| s.as_str())
130 .collect();
131 let mut seen_new: HashSet<String> = HashSet::new();
132 let mut new_tags: Vec<String> = Vec::new();
133 for t in tags.iter().filter(|t| !existing.contains(t.as_str())) {
134 if seen_new.insert(t.clone()) {
135 new_tags.push(t.clone());
136 }
137 }
138 if new_tags.is_empty() {
139 return Ok(());
140 }
141 episodic.persona_signals_emitted.extend(new_tags);
142 store.write_node(&node)
143}
144
145fn tool_affinity_signals(episode_id: Uuid, ep: &EpisodicNode) -> Vec<RawSignal> {
146 let tools: Vec<String> = ep.effective_tools().to_vec();
147 let tagged = tag_tool_names(&tools);
148 let mut out = Vec::new();
149 for _ in tagged {
150 out.push(RawSignal {
151 axis: PersonaAxis::Instrumentality,
152 reward: 0.68,
153 weight: 0.5,
154 source_node_id: episode_id,
155 source_node_type: MemoryNodeType::Episodic,
156 });
157 }
158 out
159}
160
161fn cluster_key(topic: Option<&String>) -> Option<String> {
162 let t = topic?.trim();
163 if t.is_empty() {
164 return None;
165 }
166 Some(t.to_ascii_lowercase())
167}
168
169fn domain_emergence_signals(
170 store: &SqliteGraphStore,
171 agent_id: &str,
172 state: &mut PersonaSignalExtractorState,
173) -> Result<Vec<RawSignal>, String> {
174 let mut by_cluster: HashMap<String, Vec<SemanticNode>> = HashMap::new();
175 for node in store.find_by_type("semantic")? {
176 if node.agent_id != agent_id {
177 continue;
178 }
179 let AinlNodeType::Semantic { semantic } = node.node_type else {
180 continue;
181 };
182 let Some(key) = cluster_key(semantic.topic_cluster.as_ref()) else {
183 continue;
184 };
185 by_cluster.entry(key).or_default().push(semantic);
186 }
187
188 let mut out = Vec::new();
189 for (cluster, nodes) in by_cluster {
190 let strong_nodes = nodes
191 .iter()
192 .filter(|n| n.recurrence_count >= DOMAIN_MIN_RECURRENCE_NODE)
193 .count();
194 let max_rec = nodes.iter().map(|n| n.recurrence_count).max().unwrap_or(0);
195 let crosses =
196 strong_nodes >= DOMAIN_EMIT_AT_LEAST_NODES || max_rec >= DOMAIN_SINGLE_NODE_RECURRENCE;
197 if !crosses {
198 continue;
199 }
200 if let Some(last_pass) = state.domain_cluster_last_emit_pass.get(&cluster).copied() {
201 if state.pass_seq.saturating_sub(last_pass) < DOMAIN_COOLDOWN_PASSES {
202 continue;
203 }
204 }
205 let Some(anchor) = nodes.first() else {
206 continue;
207 };
208 state
209 .domain_cluster_last_emit_pass
210 .insert(cluster.clone(), state.pass_seq);
211 out.push(RawSignal {
212 axis: PersonaAxis::Persistence,
213 reward: 0.72,
214 weight: 0.6,
215 source_node_id: anchor.source_turn_id,
216 source_node_type: MemoryNodeType::Semantic,
217 });
218 }
219 Ok(out)
220}
221
222fn correction_emit_tag(tag: &SemanticTag) -> String {
223 match tag.namespace {
224 TagNamespace::Behavior => format!("det:behavior:{}", tag.value),
225 TagNamespace::Correction => format!("det:correction:{}", tag.value),
226 _ => format!("det:{}", tag.to_canonical_string().replace(':', "_")),
227 }
228}
229
230pub fn extract_pass(
232 store: &SqliteGraphStore,
233 agent_id: &str,
234 state: &mut PersonaSignalExtractorState,
235) -> Result<Vec<RawSignal>, String> {
236 state.pass_seq = state.pass_seq.saturating_add(1);
237
238 let mut episodes: Vec<AinlMemoryNode> = store
239 .find_by_type("episode")?
240 .into_iter()
241 .filter(|n| n.agent_id == agent_id)
242 .collect();
243 episodes.sort_by_key(|n| match &n.node_type {
244 AinlNodeType::Episode { episodic } => episodic.timestamp,
245 _ => 0,
246 });
247
248 let mut out = Vec::new();
249
250 for ep_node in &episodes {
251 let episode_id = ep_node.id;
252 let AinlNodeType::Episode { episodic } = &ep_node.node_type else {
253 continue;
254 };
255 let turn = state.global_turn_index;
256 state.global_turn_index = state.global_turn_index.saturating_add(1);
257
258 let mut tags: Vec<String> = Vec::new();
259
260 if !episodic_should_process(episodic) {
263 out.extend(tool_affinity_signals(episode_id, episodic));
264 }
265
266 let user = user_text(episodic);
267
268 if let Some(tag) = extract_correction_behavior(&user) {
269 out.push(RawSignal {
270 axis: PersonaAxis::Systematicity,
271 reward: 0.84,
272 weight: 0.85,
273 source_node_id: episode_id,
274 source_node_type: MemoryNodeType::Episodic,
275 });
276 tags.push(correction_emit_tag(&tag));
277 }
278
279 if !user.is_empty()
280 && infer_brevity_preference(&user).is_some()
281 && brevity_debounce_allows(state, turn)
282 {
283 out.push(RawSignal {
284 axis: PersonaAxis::Verbosity,
285 reward: 0.22,
286 weight: 0.75,
287 source_node_id: episode_id,
288 source_node_type: MemoryNodeType::Episodic,
289 });
290 tags.push("det:brevity:explicit".into());
291 state.last_brevity_emit_turn = Some(turn);
292 state.implicit_brevity_streak = 0;
293 } else if implicit_brevity_shape(episodic) {
294 state.implicit_brevity_streak = state.implicit_brevity_streak.saturating_add(1);
295 if state.implicit_brevity_streak >= 2 && brevity_debounce_allows(state, turn) {
296 out.push(RawSignal {
297 axis: PersonaAxis::Verbosity,
298 reward: 0.24,
299 weight: 0.7,
300 source_node_id: episode_id,
301 source_node_type: MemoryNodeType::Episodic,
302 });
303 tags.push("det:brevity:implicit_shape".into());
304 state.last_brevity_emit_turn = Some(turn);
305 state.implicit_brevity_streak = 0;
306 }
307 } else {
308 state.implicit_brevity_streak = 0;
309 }
310
311 if !user.is_empty() {
312 match formality_direction_from_tag(&user) {
313 Some(dir) => {
314 let bump = match &mut state.formality_run {
315 Some((cur, n)) if *cur == dir => {
316 *n = n.saturating_add(1);
317 *n
318 }
319 _ => {
320 state.formality_run = Some((dir, 1));
321 1
322 }
323 };
324 if bump >= 3 {
325 let (reward, tag) = match dir {
326 FormalityDir::Formal => (0.78_f32, "det:formality:formal_run"),
327 FormalityDir::Informal => (0.28_f32, "det:formality:informal_run"),
328 };
329 out.push(RawSignal {
330 axis: PersonaAxis::Systematicity,
331 reward,
332 weight: 0.65,
333 source_node_id: episode_id,
334 source_node_type: MemoryNodeType::Episodic,
335 });
336 tags.push(tag.into());
337 state.formality_run = None;
338 }
339 }
340 None => {
341 state.formality_run = None;
342 }
343 }
344 }
345
346 append_episode_tags(store, episode_id, &tags)?;
347 }
348
349 out.extend(domain_emergence_signals(store, agent_id, state)?);
350 Ok(out)
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356 use ainl_memory::{AinlMemoryNode, AinlNodeType, SqliteGraphStore};
357 use ainl_semantic_tagger::{
358 extract_correction_behavior, infer_brevity_preference, infer_formality, TagNamespace,
359 };
360 use uuid::Uuid;
361
362 fn ep_with_tokens(user_t: u32, asst_t: u32) -> EpisodicNode {
363 let tid = Uuid::new_v4();
364 EpisodicNode {
365 turn_id: tid,
366 timestamp: 0,
367 tool_calls: vec![],
368 delegation_to: None,
369 trace_event: None,
370 turn_index: 0,
371 user_message_tokens: user_t,
372 assistant_response_tokens: asst_t,
373 tools_invoked: vec![],
374 persona_signals_emitted: vec![],
375 sentiment: None,
376 flagged: false,
377 conversation_id: String::new(),
378 follows_episode_id: None,
379 user_message: None,
380 assistant_response: None,
381 }
382 }
383
384 #[test]
385 fn brevity_explicit_keyword_emits() {
386 let mut st = PersonaSignalExtractorState::default();
387 let tid = Uuid::new_v4();
388 let mut ep = ep_with_tokens(0, 0);
389 ep.user_message = Some("Please be more concise here.".into());
390 let mut out: Vec<RawSignal> = Vec::new();
391 let mut tags: Vec<String> = Vec::new();
392 let turn = 0;
393 let user = user_text(&ep);
394 if !user.is_empty()
395 && infer_brevity_preference(&user).is_some()
396 && brevity_debounce_allows(&st, turn)
397 {
398 out.push(RawSignal {
399 axis: PersonaAxis::Verbosity,
400 reward: 0.22,
401 weight: 0.75,
402 source_node_id: tid,
403 source_node_type: MemoryNodeType::Episodic,
404 });
405 tags.push("det:brevity:explicit".into());
406 st.last_brevity_emit_turn = Some(turn);
407 }
408 assert_eq!(out.len(), 1);
409 assert_eq!(tags.len(), 1);
410 }
411
412 #[test]
413 fn brevity_implicit_single_no_emit_double_emits() {
414 let mut st = PersonaSignalExtractorState::default();
415 let ep = ep_with_tokens(5, 400);
416 assert!(implicit_brevity_shape(&ep));
417 st.implicit_brevity_streak = st.implicit_brevity_streak.saturating_add(1);
418 assert_eq!(st.implicit_brevity_streak, 1);
419 assert!(st.implicit_brevity_streak < 2);
420 }
421
422 #[test]
423 fn brevity_implicit_two_consecutive_emits_via_pass() {
424 let dir = tempfile::tempdir().expect("d");
425 let store = SqliteGraphStore::open(&dir.path().join("br.db")).expect("open");
426 let agent = "agent-br";
427 let mut st = PersonaSignalExtractorState::default();
428 for (ts, ut, at) in [(1_i64, 5_u32, 400_u32), (2_i64, 4_u32, 350_u32)] {
429 let tid = Uuid::new_v4();
430 let mut n = AinlMemoryNode::new_episode(tid, ts, vec![], None, None);
431 n.agent_id = agent.into();
432 if let AinlNodeType::Episode { episodic } = &mut n.node_type {
433 episodic.user_message_tokens = ut;
434 episodic.assistant_response_tokens = at;
435 }
436 store.write_node(&n).expect("w");
437 }
438 let sigs = extract_pass(&store, agent, &mut st).expect("extract");
439 let brevity = sigs
440 .iter()
441 .filter(|s| s.axis == PersonaAxis::Verbosity)
442 .count();
443 assert!(
444 brevity >= 1,
445 "expected implicit brevity after two qualifying turns"
446 );
447 }
448
449 #[test]
450 fn brevity_debounce_blocks() {
451 let st = PersonaSignalExtractorState {
452 last_brevity_emit_turn: Some(0),
453 ..Default::default()
454 };
455 assert!(!brevity_debounce_allows(&st, 1));
456 assert!(!brevity_debounce_allows(&st, 2));
457 assert!(brevity_debounce_allows(&st, 3));
458 }
459
460 #[test]
461 fn tool_invocations_emit_one_each() {
462 let tid = Uuid::new_v4();
463 let mut ep = ep_with_tokens(0, 0);
464 ep.tools_invoked = vec!["file_read".into(), "shell_exec".into()];
465 let sigs = tool_affinity_signals(tid, &ep);
466 assert_eq!(sigs.len(), 2);
467 assert!(sigs.iter().all(|s| s.axis == PersonaAxis::Instrumentality));
468 }
469
470 #[test]
471 fn append_episode_tags_dedupes_existing_and_within_batch() {
472 let dir = tempfile::tempdir().expect("d");
473 let store = SqliteGraphStore::open(&dir.path().join("ep_tags.db")).expect("open");
474 let tid = Uuid::new_v4();
475 let mut n = AinlMemoryNode::new_episode(tid, 1, vec![], None, None);
476 n.agent_id = "a".into();
477 store.write_node(&n).expect("w");
478 append_episode_tags(
479 &store,
480 n.id,
481 &["det:brevity:explicit".into(), "det:brevity:explicit".into()],
482 )
483 .expect("append");
484 let r = store.read_node(n.id).expect("r").expect("node");
485 let AinlNodeType::Episode { episodic } = r.node_type else {
486 panic!();
487 };
488 assert_eq!(
489 episodic.persona_signals_emitted,
490 vec!["det:brevity:explicit".to_string()]
491 );
492 append_episode_tags(&store, n.id, &["det:brevity:explicit".into()]).expect("append2");
493 let r2 = store.read_node(n.id).expect("r2").expect("node");
494 let AinlNodeType::Episode { episodic: e2 } = r2.node_type else {
495 panic!();
496 };
497 assert_eq!(e2.persona_signals_emitted.len(), 1);
498 }
499
500 #[test]
501 fn formality_single_informal_no_emit_until_three() {
502 let t = infer_formality("yo gonna grab some food lol yeah").expect("tag");
503 assert_eq!(t.value, "informal");
504 }
505
506 #[test]
507 fn formality_three_informal_emits_logic() {
508 let mut run: Option<(FormalityDir, u8)> = None;
509 let informal_line = "yeah gonna wanna grab some cool stuff lol";
510 let mut emitted = false;
511 for _ in 0..3 {
512 let dir = formality_direction_from_tag(informal_line).expect("dir");
513 assert_eq!(dir, FormalityDir::Informal);
514 let bump = match &mut run {
515 Some((FormalityDir::Informal, n)) => {
516 *n += 1;
517 *n
518 }
519 _ => {
520 run = Some((FormalityDir::Informal, 1));
521 1
522 }
523 };
524 if bump >= 3 {
525 emitted = true;
526 }
527 }
528 assert!(emitted);
529 }
530
531 #[test]
532 fn formality_mixed_resets() {
533 let mut run: Option<(FormalityDir, u8)> = None;
534 let msgs = [
535 "gonna grab food",
536 "Therefore, the coefficient matrix exhibits stability.",
537 "ok lol",
538 ];
539 let mut max_run = 0u8;
540 for m in msgs {
541 match formality_direction_from_tag(m) {
542 Some(dir) => {
543 let bump = match &mut run {
544 Some((cur, n)) if *cur == dir => {
545 *n += 1;
546 *n
547 }
548 _ => {
549 run = Some((dir, 1));
550 1
551 }
552 };
553 max_run = max_run.max(bump);
554 }
555 None => run = None,
556 }
557 }
558 assert!(max_run < 3);
559 }
560
561 #[test]
562 fn domain_recurrence_not_reference() {
563 let (_d, store) = {
564 let dir = tempfile::tempdir().expect("d");
565 let p = dir.path().join("t.db");
566 let s = SqliteGraphStore::open(&p).expect("open");
567 (dir, s)
568 };
569 let tid = Uuid::new_v4();
570 let mut s1 = AinlMemoryNode::new_fact("a".into(), 0.8, tid);
571 s1.agent_id = "ag".into();
572 if let AinlNodeType::Semantic { semantic } = &mut s1.node_type {
573 semantic.topic_cluster = Some("rust".into());
574 semantic.recurrence_count = 1;
575 semantic.reference_count = 99;
576 }
577 store.write_node(&s1).expect("w");
578 let mut s2 = AinlMemoryNode::new_fact("b".into(), 0.8, tid);
579 s2.agent_id = "ag".into();
580 if let AinlNodeType::Semantic { semantic } = &mut s2.node_type {
581 semantic.topic_cluster = Some("rust".into());
582 semantic.recurrence_count = 1;
583 semantic.reference_count = 99;
584 }
585 store.write_node(&s2).expect("w");
586 let mut st = PersonaSignalExtractorState {
587 pass_seq: 1,
588 ..Default::default()
589 };
590 let sigs = domain_emergence_signals(&store, "ag", &mut st).expect("d");
591 assert!(sigs.is_empty(), "high reference_count must not gate domain");
592 }
593
594 #[test]
595 fn domain_threshold_crosses() {
596 let dir = tempfile::tempdir().expect("d");
597 let store = SqliteGraphStore::open(&dir.path().join("d.db")).expect("open");
598 let tid = Uuid::new_v4();
599 for fact in ["a", "b"] {
600 let mut s = AinlMemoryNode::new_fact(fact.into(), 0.8, tid);
601 s.agent_id = "ag".into();
602 if let AinlNodeType::Semantic { semantic } = &mut s.node_type {
603 semantic.topic_cluster = Some("rust".into());
604 semantic.recurrence_count = 3;
605 }
606 store.write_node(&s).expect("w");
607 }
608 let mut st = PersonaSignalExtractorState {
609 pass_seq: 1,
610 ..Default::default()
611 };
612 let sigs = domain_emergence_signals(&store, "ag", &mut st).expect("d");
613 assert_eq!(sigs.len(), 1);
614 }
615
616 #[test]
617 fn domain_cooldown_second_pass_suppressed() {
618 let dir = tempfile::tempdir().expect("d");
619 let store = SqliteGraphStore::open(&dir.path().join("d2.db")).expect("open");
620 let tid = Uuid::new_v4();
621 for fact in ["a", "b"] {
622 let mut s = AinlMemoryNode::new_fact(fact.into(), 0.8, tid);
623 s.agent_id = "ag".into();
624 if let AinlNodeType::Semantic { semantic } = &mut s.node_type {
625 semantic.topic_cluster = Some("go".into());
626 semantic.recurrence_count = 3;
627 }
628 store.write_node(&s).expect("w");
629 }
630 let mut st = PersonaSignalExtractorState {
631 pass_seq: 1,
632 ..Default::default()
633 };
634 let n1 = domain_emergence_signals(&store, "ag", &mut st)
635 .expect("d")
636 .len();
637 st.pass_seq = 2;
638 let n2 = domain_emergence_signals(&store, "ag", &mut st)
639 .expect("d")
640 .len();
641 assert_eq!(n1, 1);
642 assert_eq!(n2, 0);
643 }
644
645 #[test]
646 fn correction_dont_use_bullets() {
647 let t = extract_correction_behavior("don't use bullet points").expect("tag");
648 assert_eq!(t.namespace, TagNamespace::Correction);
649 assert_eq!(t.value, "avoid_bullets");
650 }
651
652 #[test]
653 fn correction_you_keep_caveats() {
654 let t = extract_correction_behavior("you keep adding caveats").expect("tag");
655 assert_eq!(t.namespace, TagNamespace::Behavior);
656 assert_eq!(t.value, "adding_caveats");
657 }
658
659 #[test]
660 fn correction_told_emojis() {
661 let t = extract_correction_behavior("I told you not to use emojis").expect("tag");
662 assert_eq!(t.namespace, TagNamespace::Correction);
663 assert_eq!(t.value, "avoid_emojis");
664 }
665
666 #[test]
667 fn correction_stop_alone() {
668 assert!(extract_correction_behavior("stop").is_none());
669 }
670
671 #[test]
672 fn correction_i_said_so() {
673 assert!(extract_correction_behavior("I said so").is_none());
674 }
675
676 #[test]
677 fn correction_dont_do_that_no_behavior() {
678 assert!(extract_correction_behavior("don't do that").is_none());
679 }
680}