Skip to main content

roboticus_agent/
task_state.rs

1//! Task Operating State — introspection-driven state synthesis.
2//!
3//! This module defines the types and synthesis rules for building a
4//! [`TaskOperatingState`] from raw subsystem outputs ([`TaskStateInput`]).
5//!
6//! ## Design Invariants
7//!
8//! - `roboticus-agent` owns all synthesis rules. `roboticus-api` supplies inputs only.
9//! - No duplicated truth between crates — if both compute "fit," that's a bug.
10//! - Memory has two roles:
11//!   - **Conversation**: ambient enrichment (passive, automatic)
12//!   - **Task**: operational input (active, introspective)
13//!   - `InspectMemory` applies to task turns only. Conversational turns get
14//!     passive memory via the existing context builder.
15
16use serde::Serialize;
17
18use crate::retrieval::RetrievalMetrics;
19use crate::tool_search::ToolSearchStats;
20
21// ── Input: raw facts from subsystems ─────────────────────────────────
22
23/// Raw facts assembled by `roboticus-api` from pipeline subsystems.
24/// This is the sole input to [`synthesize`] — the API layer collects
25/// these values but does NOT interpret them.
26#[derive(Debug, Clone)]
27pub struct TaskStateInput {
28    /// The user's message text.
29    pub user_content: String,
30    /// Serialized intent names (from IntentRegistry).
31    pub intents: Vec<String>,
32    /// Serialized InputAuthority variant name.
33    pub authority: String,
34
35    // ── From retrieval subsystem ──
36    pub retrieval_metrics: Option<RetrievalMetrics>,
37
38    // ── From tool search subsystem ──
39    pub tool_search_stats: Option<ToolSearchStats>,
40    /// Whether any MCP-sourced tools are available.
41    pub mcp_tools_available: bool,
42
43    // ── From roster query ──
44    /// Enabled, non-proxy subagents.
45    pub taskable_agent_count: usize,
46    /// Agents whose skills overlap with the task.
47    pub fit_agent_count: usize,
48    /// Names of fitting agents.
49    pub fit_agent_names: Vec<String>,
50
51    // ── From skill registry ──
52    pub enabled_skill_count: usize,
53    /// Skills whose triggers match the user input.
54    pub matching_skill_count: usize,
55    /// Skills that would be useful but are not registered.
56    pub missing_skills: Vec<String>,
57
58    // ── From runtime state ──
59    /// Tokens remaining after system prompt.
60    pub remaining_budget_tokens: usize,
61    /// Whether any LLM provider circuit breaker is open.
62    pub provider_breaker_open: bool,
63    /// "standard" or "streaming".
64    pub inference_mode: String,
65
66    // ── From decomposition gate (scored input, NOT authority) ──
67    pub decomposition_proposal: Option<DecompositionProposal>,
68    /// Whether the user explicitly requested specialist/delegation workflow.
69    pub explicit_specialist_workflow: bool,
70    /// Whether the user's message references an existing registered tool by name.
71    /// When true, the named capability already exists in the tool registry and will
72    /// be available during inference — the planner should NOT propose specialist
73    /// creation for it.
74    pub named_tool_match: bool,
75
76    // ── Behavioral history (from recent turn memory) ──
77    /// Structural fingerprints of the last N assistant responses.
78    /// Each string is a compact skeleton like "narrative+question+options".
79    pub recent_response_skeletons: Vec<String>,
80    /// Word counts of the last N user messages (engagement signal).
81    pub recent_user_message_lengths: Vec<usize>,
82    /// Notable phrases (8+ words) extracted from recent assistant turns.
83    /// Used to detect self-echoing — the agent reusing its own prior phrasing.
84    pub self_echo_fragments: Vec<String>,
85    /// Whether the user's input contains a declared physical action
86    /// (verb + target pattern, e.g. "I attack the goblin").
87    pub declared_action: Option<DeclaredAction>,
88    /// Whether the previous assistant turn contained protocol normalization
89    /// issues (malformed tool calls, narrated next steps, unexecuted streaming
90    /// markers). When true, the planner may select NormalizationRetry.
91    pub previous_turn_had_protocol_issues: bool,
92    /// Number of consecutive turns with protocol normalization issues.
93    /// Zero when `previous_turn_had_protocol_issues` is false; allows the
94    /// planner to escalate retry strategy for persistent normalization failures.
95    pub normalization_retry_streak: u8,
96}
97
98/// A user-declared action that must be resolved, not redirected.
99#[derive(Debug, Clone, Serialize)]
100pub struct DeclaredAction {
101    /// The action verb ("attack", "stab", "grab", "throw", etc.).
102    pub verb: String,
103    /// The target of the action ("the goblin", "the door", "the gem").
104    pub target: String,
105}
106
107/// Decomposition gate output, used as one scored signal by the planner.
108#[derive(Debug, Clone, Serialize)]
109pub struct DecompositionProposal {
110    pub should_delegate: bool,
111    pub rationale: String,
112    pub utility_margin: f64,
113}
114
115// ── Operating State: synthesized assessment ──────────────────────────
116
117/// Classification of the turn as task-oriented or conversational.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
119pub enum TaskClassification {
120    /// Casual exchange — memory is ambient enrichment.
121    Conversation,
122    /// Goal-directed work — memory is operational input.
123    Task,
124}
125
126/// Memory retrieval confidence assessment.
127#[derive(Debug, Clone, Serialize)]
128pub struct MemoryConfidence {
129    /// Average cosine similarity across retrieved memories.
130    pub avg_similarity: f64,
131    /// Fraction of context budget consumed by memory tokens.
132    pub budget_utilization: f64,
133    /// Total memories retrieved.
134    pub retrieval_count: usize,
135    /// Whether any tier returned zero results despite budget availability.
136    pub recall_gap: bool,
137    /// Names of tiers that returned no results.
138    pub empty_tiers: Vec<String>,
139}
140
141/// Runtime and storage constraints affecting action selection.
142#[derive(Debug, Clone, Serialize)]
143pub struct RuntimeConstraints {
144    /// Tokens remaining after system prompt.
145    pub remaining_budget_tokens: usize,
146    /// Whether the token budget is under pressure (>80% used by system prompt + memory).
147    pub budget_pressured: bool,
148    /// Whether any LLM provider circuit breaker is open.
149    pub provider_breaker_open: bool,
150    /// Current inference mode.
151    pub inference_mode: String,
152}
153
154/// Tool availability and relevance assessment.
155#[derive(Debug, Clone, Serialize)]
156pub struct ToolFit {
157    /// Total tools available after search/pruning.
158    pub available_count: usize,
159    /// Tools with high relevance score (>0.7 cosine).
160    pub high_relevance_count: usize,
161    /// Tokens saved by pruning low-relevance tools.
162    pub token_savings: usize,
163    /// Whether MCP-sourced tools are available.
164    pub mcp_available: bool,
165}
166
167/// Subagent roster fit assessment.
168#[derive(Debug, Clone, Serialize)]
169pub struct RosterFit {
170    /// Total enabled, non-proxy agents.
171    pub taskable_count: usize,
172    /// Agents with capability overlap for this task.
173    pub fit_count: usize,
174    /// Names of fitting agents.
175    pub fit_names: Vec<String>,
176    /// Whether the user explicitly requested specialist/delegation workflow.
177    pub explicit_workflow: bool,
178}
179
180/// Skill fit assessment.
181#[derive(Debug, Clone, Serialize)]
182pub struct SkillFit {
183    /// Total enabled skills.
184    pub enabled_count: usize,
185    /// Skills whose triggers match the user input.
186    pub matching_count: usize,
187    /// Skills that would be useful but are not registered.
188    pub missing_skills: Vec<String>,
189}
190
191/// Behavioral self-assessment based on recent output history.
192#[derive(Debug, Clone, Serialize)]
193pub struct BehavioralHistory {
194    /// Whether the agent has been producing structurally identical responses.
195    pub structural_repetition: bool,
196    /// Number of consecutive responses with the same skeleton.
197    pub repetition_streak: usize,
198    /// The repeated skeleton pattern (if any).
199    pub repeated_pattern: Option<String>,
200    /// Whether user engagement is declining (messages getting shorter/more directive).
201    pub engagement_declining: bool,
202    /// Proportion of recent assistant fragments present in the current user message
203    /// (0.0 = none, 1.0 = all fragments matched). A value above 0.3 suggests the
204    /// user is echoing back the agent's own language — possible confusion signal.
205    pub self_echo_risk: f32,
206    /// The matched echo fragment, if any.
207    pub echo_fragment: Option<String>,
208    /// A context hint to inject when structural repetition or echo is detected.
209    pub variation_hint: Option<String>,
210}
211
212/// Assessment of whether the user declared a specific action.
213#[derive(Debug, Clone, Serialize)]
214pub struct DeclaredActionState {
215    /// Whether the user declared a physical/concrete action.
216    pub action_declared: bool,
217    /// The declared action details (if any).
218    pub action: Option<DeclaredAction>,
219    /// Whether the action has significant consequences that should be surfaced.
220    pub high_consequence: bool,
221}
222
223/// Full introspection-driven operating state for a turn.
224///
225/// Built by [`synthesize`] from a [`TaskStateInput`]. This struct captures
226/// the agent's assessment of its own readiness — memory quality, tool
227/// availability, roster fit, runtime constraints, and behavioral history.
228#[derive(Debug, Clone, Serialize)]
229pub struct TaskOperatingState {
230    pub classification: TaskClassification,
231    pub memory_confidence: MemoryConfidence,
232    pub runtime_constraints: RuntimeConstraints,
233    pub tool_fit: ToolFit,
234    pub roster_fit: RosterFit,
235    pub skill_fit: SkillFit,
236    pub behavioral_history: BehavioralHistory,
237    pub declared_action: DeclaredActionState,
238}
239
240// ── Synthesis: TaskStateInput -> TaskOperatingState ──────────────────
241
242/// Task-like intent names. These intents signal goal-directed work
243/// rather than casual conversation.
244const TASK_INTENTS: &[&str] = &[
245    "execution",
246    "delegation",
247    "cron",
248    "filedistribution",
249    "folderscan",
250    "randomtooluse",
251    "currentevents",
252    "walletaddressscan",
253    "imagecountscan",
254    "markdowncountscan",
255    "obsidianinsights",
256    "emailtriage",
257];
258
259/// Synthesize a [`TaskOperatingState`] from raw subsystem outputs.
260///
261/// This is the **single source of truth** for operating state derivation.
262/// The rules live here in `roboticus-agent`, not in `roboticus-api`.
263pub fn synthesize(input: &TaskStateInput) -> TaskOperatingState {
264    let classification = classify_turn(input);
265    let memory_confidence = assess_memory(input);
266    let runtime_constraints = assess_runtime(input);
267    let tool_fit = assess_tools(input);
268    let roster_fit = assess_roster(input);
269    let skill_fit = assess_skills(input);
270    let behavioral_history = assess_behavioral_history(input);
271    let declared_action = assess_declared_action(input);
272
273    TaskOperatingState {
274        classification,
275        memory_confidence,
276        runtime_constraints,
277        tool_fit,
278        roster_fit,
279        skill_fit,
280        behavioral_history,
281        declared_action,
282    }
283}
284
285fn classify_turn(input: &TaskStateInput) -> TaskClassification {
286    if input.explicit_specialist_workflow {
287        return TaskClassification::Task;
288    }
289    let intents_lower: Vec<String> = input
290        .intents
291        .iter()
292        .map(|i| i.to_ascii_lowercase())
293        .collect();
294    if TASK_INTENTS
295        .iter()
296        .any(|t| intents_lower.iter().any(|i| i == t))
297    {
298        TaskClassification::Task
299    } else {
300        TaskClassification::Conversation
301    }
302}
303
304fn assess_memory(input: &TaskStateInput) -> MemoryConfidence {
305    match &input.retrieval_metrics {
306        Some(metrics) => {
307            let mut empty_tiers = Vec::new();
308            if metrics.tiers.working == 0 {
309                empty_tiers.push("working".into());
310            }
311            if metrics.tiers.episodic == 0 {
312                empty_tiers.push("episodic".into());
313            }
314            if metrics.tiers.semantic == 0 {
315                empty_tiers.push("semantic".into());
316            }
317            if metrics.tiers.procedural == 0 {
318                empty_tiers.push("procedural".into());
319            }
320            if metrics.tiers.relationship == 0 {
321                empty_tiers.push("relationship".into());
322            }
323            let recall_gap = !empty_tiers.is_empty() && metrics.budget_utilization < 0.8;
324            MemoryConfidence {
325                avg_similarity: metrics.avg_similarity,
326                budget_utilization: metrics.budget_utilization,
327                retrieval_count: metrics.retrieval_count,
328                recall_gap,
329                empty_tiers,
330            }
331        }
332        None => MemoryConfidence {
333            avg_similarity: 0.0,
334            budget_utilization: 0.0,
335            retrieval_count: 0,
336            recall_gap: false,
337            empty_tiers: Vec::new(),
338        },
339    }
340}
341
342fn assess_runtime(input: &TaskStateInput) -> RuntimeConstraints {
343    RuntimeConstraints {
344        remaining_budget_tokens: input.remaining_budget_tokens,
345        budget_pressured: input.remaining_budget_tokens < 2000,
346        provider_breaker_open: input.provider_breaker_open,
347        inference_mode: input.inference_mode.clone(),
348    }
349}
350
351fn assess_tools(input: &TaskStateInput) -> ToolFit {
352    match &input.tool_search_stats {
353        Some(stats) => ToolFit {
354            available_count: stats.candidates_selected,
355            high_relevance_count: stats
356                .candidates_selected
357                .min(stats.candidates_considered / 3),
358            token_savings: stats.token_savings,
359            mcp_available: input.mcp_tools_available,
360        },
361        None => ToolFit {
362            available_count: 0,
363            high_relevance_count: 0,
364            token_savings: 0,
365            mcp_available: input.mcp_tools_available,
366        },
367    }
368}
369
370fn assess_roster(input: &TaskStateInput) -> RosterFit {
371    RosterFit {
372        taskable_count: input.taskable_agent_count,
373        fit_count: input.fit_agent_count,
374        fit_names: input.fit_agent_names.clone(),
375        explicit_workflow: input.explicit_specialist_workflow,
376    }
377}
378
379fn assess_skills(input: &TaskStateInput) -> SkillFit {
380    SkillFit {
381        enabled_count: input.enabled_skill_count,
382        matching_count: input.matching_skill_count,
383        missing_skills: input.missing_skills.clone(),
384    }
385}
386
387fn assess_behavioral_history(input: &TaskStateInput) -> BehavioralHistory {
388    let skeletons = &input.recent_response_skeletons;
389
390    // Detect structural repetition: 3+ consecutive identical skeletons
391    let (repetition_streak, repeated_pattern) = if skeletons.len() >= 3 {
392        let last = &skeletons[skeletons.len() - 1];
393        let streak = skeletons.iter().rev().take_while(|s| s == &last).count();
394        if streak >= 3 {
395            (streak, Some(last.clone()))
396        } else {
397            (1, None)
398        }
399    } else {
400        (0, None)
401    };
402    let structural_repetition = repetition_streak >= 3;
403
404    // Detect declining engagement: user messages getting shorter over 3+ turns
405    let engagement_declining = if input.recent_user_message_lengths.len() >= 3 {
406        let lens = &input.recent_user_message_lengths;
407        let recent = &lens[lens.len().saturating_sub(3)..];
408        // Declining if each message is shorter than the previous
409        recent.windows(2).all(|w| w[1] < w[0])
410            // And the most recent is short (under 30 words)
411            && *recent.last().unwrap_or(&100) < 30
412    } else {
413        false
414    };
415
416    // Detect self-echo: compute proportion of fragments present in current user message.
417    // A non-zero proportion suggests the user is mirroring the agent's own language.
418    let user_lower = input.user_content.to_ascii_lowercase();
419    let (self_echo_risk, echo_fragment) = if input.self_echo_fragments.is_empty() {
420        (0.0_f32, None)
421    } else {
422        let total = input.self_echo_fragments.len();
423        let mut first_match: Option<String> = None;
424        let matched = input
425            .self_echo_fragments
426            .iter()
427            .filter(|frag| {
428                let found = user_lower.contains(frag.to_ascii_lowercase().as_str());
429                if found && first_match.is_none() {
430                    first_match = Some((*frag).clone());
431                }
432                found
433            })
434            .count();
435        let proportion = matched as f32 / total as f32;
436        (proportion, first_match)
437    };
438
439    let variation_hint = if structural_repetition {
440        Some(format!(
441            "Your last {} responses followed the same structure ({}). \
442             Vary your response format — try a different approach.",
443            repetition_streak,
444            repeated_pattern.as_deref().unwrap_or("unknown")
445        ))
446    } else if self_echo_risk > 0.0 {
447        Some(format!(
448            "The user's message contains a phrase you recently used: \"{}\". \
449             Avoid repeating your own prior phrasing — use fresh language.",
450            echo_fragment.as_deref().unwrap_or("")
451        ))
452    } else if engagement_declining {
453        Some(
454            "The user's messages are getting shorter and more directive. \
455             This may indicate your responses aren't meeting their needs. \
456             Consider changing your approach."
457                .into(),
458        )
459    } else {
460        None
461    };
462
463    BehavioralHistory {
464        structural_repetition,
465        repetition_streak,
466        repeated_pattern,
467        engagement_declining,
468        self_echo_risk,
469        echo_fragment,
470        variation_hint,
471    }
472}
473
474/// Canonical list of physical/mechanical action verbs indicating the user is
475/// declaring an action that must be resolved, not a conversational request.
476/// Shared between `task_state::assess_declared_action` and the
477/// `DeclaredActionGuard` in `guard_registry.rs`.
478pub const ACTION_VERBS: &[&str] = &[
479    // Combat
480    "attack", "stab", "slash", "hit", "strike", "punch", "kick", "shoot", "throw",
481    // Object manipulation
482    "grab", "pull", "push", "break", "lock", "unlock", // Movement
483    "climb", "jump", "swim", "fly", // Equipment / casting
484    "cast", "equip", "draw", "sheathe", // Social (declarative, not conversational)
485    "shout", "whisper", // Stealth / theft
486    "hide", "sneak", "steal", // Defensive / evasive
487    "dodge", "block", "flee", "charge", // Physical force
488    "tackle", "wrestle", "drag", "cut", "smash", "crush", "burn", "freeze", "lift", "shove",
489];
490
491fn assess_declared_action(input: &TaskStateInput) -> DeclaredActionState {
492    let content_lower = input.user_content.to_ascii_lowercase();
493    let words: Vec<&str> = content_lower.split_whitespace().collect();
494
495    // Look for "I [verb]" or "[verb] the/my/a" patterns
496    let mut found_verb = None;
497    let mut found_target = None;
498
499    for (i, word) in words.iter().enumerate() {
500        let clean = word.trim_matches(|c: char| !c.is_alphabetic());
501        if ACTION_VERBS.contains(&clean) {
502            found_verb = Some(clean.to_string());
503
504            // Try to extract target: words after the verb until end or punctuation
505            let target_words: Vec<&str> = words[i + 1..]
506                .iter()
507                .take(6)
508                .take_while(|w| !w.ends_with('.') && !w.ends_with('!') && !w.ends_with('?'))
509                .copied()
510                .collect();
511            if !target_words.is_empty() {
512                found_target = Some(target_words.join(" "));
513            }
514            break;
515        }
516    }
517
518    match (found_verb, found_target) {
519        (Some(verb), Some(target)) => DeclaredActionState {
520            action_declared: true,
521            action: Some(DeclaredAction {
522                verb: verb.clone(),
523                target: target.clone(),
524            }),
525            // Simple heuristic: combat verbs against characters are high-consequence
526            high_consequence: [
527                "attack", "stab", "slash", "strike", "punch", "kick", "shoot", "kill", "cast",
528                "burn", "crush", "smash",
529            ]
530            .contains(&verb.as_str()),
531        },
532        _ => DeclaredActionState {
533            action_declared: false,
534            action: None,
535            high_consequence: false,
536        },
537    }
538}
539
540// ── Output history helpers (called by roboticus-api to build TaskStateInput) ──
541
542/// Derive a compact structural skeleton label from an assistant response.
543///
544/// This is a best-effort heuristic fingerprint — the goal is to detect when
545/// the same *pattern* keeps appearing (e.g., always ending with a question,
546/// always using an options list), not to perfectly classify prose.
547pub fn response_skeleton(text: &str) -> String {
548    let has_options = text.contains("1.") || text.contains("- ") || text.contains("* ");
549    let has_question = text.trim_end().ends_with('?');
550    let has_dialogue = text.contains('"') || text.contains('\u{201c}') || text.contains('\u{201d}');
551    let has_pause = text.contains("...") || text.contains('\u{2026}');
552    let has_list = text
553        .lines()
554        .filter(|l| l.trim_start().starts_with('-') || l.trim_start().starts_with('*'))
555        .count()
556        >= 3;
557
558    let mut parts: Vec<&str> = Vec::new();
559    if has_dialogue {
560        parts.push("dialogue");
561    } else {
562        parts.push("narrative");
563    }
564    if has_list {
565        parts.push("list");
566    } else if has_options {
567        parts.push("options");
568    }
569    if has_question {
570        parts.push("question");
571    }
572    if has_pause {
573        parts.push("pause");
574    }
575    if parts.is_empty() {
576        "prose".into()
577    } else {
578        parts.join("+")
579    }
580}
581
582/// Extract notable phrases (8+ words) from assistant response text.
583///
584/// These are used as echo-detection fragments so the agent can avoid
585/// reusing its own prior phrasing verbatim in subsequent turns.
586pub fn extract_echo_fragments(text: &str) -> Vec<String> {
587    // Split into sentences, then yield any sentence window of 8–15 words
588    text.split(['.', '!', '?', '\n'])
589        .filter_map(|sentence| {
590            let words: Vec<&str> = sentence.split_whitespace().collect();
591            if words.len() >= 8 {
592                // Take a 8-word window from the middle to avoid trivial openers
593                let start = words.len() / 4;
594                let end = (start + 12).min(words.len());
595                Some(words[start..end].join(" ").to_ascii_lowercase())
596            } else {
597                None
598            }
599        })
600        .take(5) // Cap fragments per response to limit prompt bloat
601        .collect()
602}
603
604// ── Tests ────────────────────────────────────────────────────────────
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609
610    fn base_input() -> TaskStateInput {
611        TaskStateInput {
612            user_content: "test message".into(),
613            intents: vec![],
614            authority: "SelfGenerated".into(),
615            retrieval_metrics: None,
616            tool_search_stats: None,
617            mcp_tools_available: false,
618            taskable_agent_count: 0,
619            fit_agent_count: 0,
620            fit_agent_names: vec![],
621            enabled_skill_count: 0,
622            matching_skill_count: 0,
623            missing_skills: vec![],
624            remaining_budget_tokens: 8000,
625            provider_breaker_open: false,
626            inference_mode: "standard".into(),
627            decomposition_proposal: None,
628            explicit_specialist_workflow: false,
629            named_tool_match: false,
630            recent_response_skeletons: vec![],
631            recent_user_message_lengths: vec![],
632            self_echo_fragments: vec![],
633            declared_action: None,
634            previous_turn_had_protocol_issues: false,
635            normalization_retry_streak: 0,
636        }
637    }
638
639    #[test]
640    fn conversation_classification_with_no_task_intents() {
641        let input = base_input();
642        let state = synthesize(&input);
643        assert_eq!(state.classification, TaskClassification::Conversation);
644    }
645
646    #[test]
647    fn structural_repetition_detected_after_3_identical_skeletons() {
648        let mut input = base_input();
649        input.recent_response_skeletons = vec![
650            "narrative+question+options".into(),
651            "narrative+question+options".into(),
652            "narrative+question+options".into(),
653        ];
654        let state = synthesize(&input);
655        assert!(state.behavioral_history.structural_repetition);
656        assert_eq!(state.behavioral_history.repetition_streak, 3);
657        assert!(state.behavioral_history.variation_hint.is_some());
658    }
659
660    #[test]
661    fn no_repetition_with_varied_skeletons() {
662        let mut input = base_input();
663        input.recent_response_skeletons = vec![
664            "narrative+question+options".into(),
665            "dialogue+pause".into(),
666            "narrative+question+options".into(),
667        ];
668        let state = synthesize(&input);
669        assert!(!state.behavioral_history.structural_repetition);
670    }
671
672    #[test]
673    fn engagement_declining_detected() {
674        let mut input = base_input();
675        input.recent_user_message_lengths = vec![50, 30, 10];
676        let state = synthesize(&input);
677        assert!(state.behavioral_history.engagement_declining);
678    }
679
680    #[test]
681    fn self_echo_risk_detected_when_user_repeats_agent_phrase() {
682        let mut input = base_input();
683        input.user_content = "You said the ancient stone door is the only way forward".into();
684        input.self_echo_fragments = vec!["the ancient stone door is the only way forward".into()];
685        let state = synthesize(&input);
686        assert!(state.behavioral_history.self_echo_risk > 0.0);
687        assert!(state.behavioral_history.echo_fragment.is_some());
688        assert!(state.behavioral_history.variation_hint.is_some());
689    }
690
691    #[test]
692    fn self_echo_risk_not_detected_without_match() {
693        let mut input = base_input();
694        input.user_content = "What happens next?".into();
695        input.self_echo_fragments = vec!["the ancient stone door is the only way forward".into()];
696        let state = synthesize(&input);
697        assert_eq!(state.behavioral_history.self_echo_risk, 0.0);
698        assert!(state.behavioral_history.echo_fragment.is_none());
699    }
700
701    #[test]
702    fn structural_repetition_takes_priority_over_echo_in_variation_hint() {
703        let mut input = base_input();
704        input.recent_response_skeletons = vec![
705            "narrative+question+options".into(),
706            "narrative+question+options".into(),
707            "narrative+question+options".into(),
708        ];
709        input.user_content = "the ancient stone door is the only way forward right?".into();
710        input.self_echo_fragments = vec!["the ancient stone door is the only way forward".into()];
711        let state = synthesize(&input);
712        // Both conditions true, but structural repetition hint wins
713        assert!(state.behavioral_history.structural_repetition);
714        assert!(state.behavioral_history.self_echo_risk > 0.0);
715        let hint = state.behavioral_history.variation_hint.unwrap();
716        assert!(hint.contains("structure"));
717    }
718
719    #[test]
720    fn declared_action_detected_for_combat_verb() {
721        let mut input = base_input();
722        input.user_content = "I attack the goblin with my sword".into();
723        let state = synthesize(&input);
724        assert!(state.declared_action.action_declared);
725        assert_eq!(
726            state.declared_action.action.as_ref().unwrap().verb,
727            "attack"
728        );
729        assert!(state.declared_action.high_consequence);
730    }
731
732    #[test]
733    fn declared_action_not_detected_for_question() {
734        let mut input = base_input();
735        input.user_content = "What can I see in the room?".into();
736        let state = synthesize(&input);
737        assert!(!state.declared_action.action_declared);
738    }
739
740    #[test]
741    fn task_classification_with_execution_intent() {
742        let mut input = base_input();
743        input.intents = vec!["Execution".into()];
744        let state = synthesize(&input);
745        assert_eq!(state.classification, TaskClassification::Task);
746    }
747
748    #[test]
749    fn task_classification_with_explicit_workflow() {
750        let mut input = base_input();
751        input.explicit_specialist_workflow = true;
752        let state = synthesize(&input);
753        assert_eq!(state.classification, TaskClassification::Task);
754    }
755
756    #[test]
757    fn memory_confidence_with_no_retrieval() {
758        let input = base_input();
759        let state = synthesize(&input);
760        assert_eq!(state.memory_confidence.retrieval_count, 0);
761        assert!(!state.memory_confidence.recall_gap);
762    }
763
764    #[test]
765    fn memory_confidence_detects_recall_gap() {
766        let mut input = base_input();
767        input.retrieval_metrics = Some(RetrievalMetrics {
768            retrieval_count: 2,
769            retrieval_hit: true,
770            avg_similarity: 0.4,
771            budget_utilization: 0.3,
772            tiers: crate::retrieval::MemoryTierBreakdown {
773                working: 1,
774                episodic: 1,
775                semantic: 0,
776                procedural: 0,
777                relationship: 0,
778            },
779        });
780        let state = synthesize(&input);
781        assert!(state.memory_confidence.recall_gap);
782        assert_eq!(state.memory_confidence.empty_tiers.len(), 3);
783    }
784
785    #[test]
786    fn runtime_pressure_at_low_budget() {
787        let mut input = base_input();
788        input.remaining_budget_tokens = 1500;
789        let state = synthesize(&input);
790        assert!(state.runtime_constraints.budget_pressured);
791    }
792
793    #[test]
794    fn runtime_no_pressure_at_normal_budget() {
795        let input = base_input();
796        let state = synthesize(&input);
797        assert!(!state.runtime_constraints.budget_pressured);
798    }
799
800    #[test]
801    fn tool_fit_from_search_stats() {
802        let mut input = base_input();
803        input.tool_search_stats = Some(ToolSearchStats {
804            candidates_considered: 30,
805            candidates_selected: 12,
806            candidates_pruned: 18,
807            token_savings: 4500,
808            top_scores: vec![],
809            embedding_status: "ok".into(),
810        });
811        input.mcp_tools_available = true;
812        let state = synthesize(&input);
813        assert_eq!(state.tool_fit.available_count, 12);
814        assert_eq!(state.tool_fit.token_savings, 4500);
815        assert!(state.tool_fit.mcp_available);
816    }
817
818    #[test]
819    fn roster_fit_reflects_input() {
820        let mut input = base_input();
821        input.taskable_agent_count = 5;
822        input.fit_agent_count = 2;
823        input.fit_agent_names = vec!["research-specialist".into(), "code-specialist".into()];
824        input.explicit_specialist_workflow = true;
825        let state = synthesize(&input);
826        assert_eq!(state.roster_fit.taskable_count, 5);
827        assert_eq!(state.roster_fit.fit_count, 2);
828        assert!(state.roster_fit.explicit_workflow);
829    }
830
831    #[test]
832    fn skill_fit_tracks_missing() {
833        let mut input = base_input();
834        input.enabled_skill_count = 10;
835        input.matching_skill_count = 3;
836        input.missing_skills = vec!["dnd-rules".into(), "combat-tracker".into()];
837        let state = synthesize(&input);
838        assert_eq!(state.skill_fit.missing_skills.len(), 2);
839    }
840
841    // ── Skeleton extraction tests ─────────────────────────────────────
842
843    #[test]
844    fn response_skeleton_prose_only() {
845        let text = "The adventurer walks down the long corridor.";
846        assert_eq!(response_skeleton(text), "narrative");
847    }
848
849    #[test]
850    fn response_skeleton_with_question() {
851        let text = "What do you want to do next?";
852        assert_eq!(response_skeleton(text), "narrative+question");
853    }
854
855    #[test]
856    fn response_skeleton_dialogue_with_pause() {
857        let text = "\"You shall not pass,\" the wizard said... stepping forward.";
858        assert_eq!(response_skeleton(text), "dialogue+pause");
859    }
860
861    #[test]
862    fn response_skeleton_narrative_list_question() {
863        let text =
864            "You see three paths:\n- North road\n- East gate\n- West forest\nWhich do you take?";
865        assert_eq!(response_skeleton(text), "narrative+list+question");
866    }
867
868    #[test]
869    fn response_skeleton_options_without_enough_bullets() {
870        // Only 2 bullet lines — counts as "options", not "list"
871        let text = "You can:\n- Fight the guard\n- Sneak past them";
872        assert_eq!(response_skeleton(text), "narrative+options");
873    }
874
875    #[test]
876    fn extract_echo_fragments_returns_empty_for_short_text() {
877        let frags = extract_echo_fragments("Short text.");
878        assert!(frags.is_empty());
879    }
880
881    #[test]
882    fn extract_echo_fragments_yields_window_from_long_sentence() {
883        let text = "The ancient stone door stood resolute at the end of the corridor blocking all passage forward.";
884        let frags = extract_echo_fragments(text);
885        assert!(!frags.is_empty());
886        // Each fragment should be lowercase
887        for frag in &frags {
888            assert_eq!(frag.to_ascii_lowercase(), *frag);
889        }
890    }
891
892    #[test]
893    fn extract_echo_fragments_caps_at_five_per_call() {
894        // Construct a response with many long sentences
895        let sentence =
896            "The warrior raised his sword and struck the enemy down with great force and fury";
897        let text = (0..20).map(|_| sentence).collect::<Vec<_>>().join(". ");
898        let frags = extract_echo_fragments(&text);
899        assert!(frags.len() <= 5);
900    }
901
902    // ── Pattern detection tests ───────────────────────────────────────
903
904    #[test]
905    fn pattern_locked_false_when_fewer_than_3_skeletons() {
906        let mut input = base_input();
907        input.recent_response_skeletons =
908            vec!["narrative+question".into(), "narrative+question".into()];
909        let state = synthesize(&input);
910        assert!(!state.behavioral_history.structural_repetition);
911        assert_eq!(state.behavioral_history.repetition_streak, 0);
912    }
913
914    #[test]
915    fn pattern_locked_true_with_4_identical_skeletons() {
916        let mut input = base_input();
917        input.recent_response_skeletons = vec![
918            "dialogue+pause".into(),
919            "dialogue+pause".into(),
920            "dialogue+pause".into(),
921            "dialogue+pause".into(),
922        ];
923        let state = synthesize(&input);
924        assert!(state.behavioral_history.structural_repetition);
925        assert_eq!(state.behavioral_history.repetition_streak, 4);
926        assert_eq!(
927            state.behavioral_history.repeated_pattern.as_deref(),
928            Some("dialogue+pause")
929        );
930    }
931
932    #[test]
933    fn pattern_locked_false_when_last_two_differ() {
934        let mut input = base_input();
935        input.recent_response_skeletons = vec![
936            "narrative+question".into(),
937            "narrative+question".into(),
938            "dialogue+pause".into(), // different final skeleton breaks the streak
939        ];
940        let state = synthesize(&input);
941        assert!(!state.behavioral_history.structural_repetition);
942    }
943
944    #[test]
945    fn self_echo_risk_proportion_computed_correctly() {
946        let mut input = base_input();
947        // 2 fragments, user message contains exactly 1 of them
948        input.self_echo_fragments = vec![
949            "the ancient stone door is the only way forward".into(),
950            "a completely different phrase that is not in message".into(),
951        ];
952        input.user_content =
953            "You said the ancient stone door is the only way forward right?".into();
954        let state = synthesize(&input);
955        // 1 of 2 fragments matched → 0.5
956        assert!((state.behavioral_history.self_echo_risk - 0.5).abs() < 0.001);
957    }
958
959    #[test]
960    fn self_echo_risk_zero_when_no_fragments() {
961        let mut input = base_input();
962        input.self_echo_fragments = vec![];
963        input.user_content = "What should I do?".into();
964        let state = synthesize(&input);
965        assert_eq!(state.behavioral_history.self_echo_risk, 0.0);
966    }
967
968    #[test]
969    fn user_engagement_declining_false_when_not_monotonic() {
970        let mut input = base_input();
971        // Not monotonically decreasing (30 > 20, then 25 > 20)
972        input.recent_user_message_lengths = vec![50, 30, 40, 20];
973        let state = synthesize(&input);
974        assert!(!state.behavioral_history.engagement_declining);
975    }
976}