use std::collections::HashSet;
use std::sync::LazyLock;
use crate::api::routes::agent::guard_registry::requested_exact_bullet_count;
use crate::api::routes::agent::guard_registry::{
Guard, GuardContext, GuardId, GuardVerdict, first_absolute_path, guard_context_is_task_like,
};
use crate::api::routes::agent::intent_registry::Intent;
static INTROSPECTION_TOOLS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
[
"get_memory_stats",
"get_runtime_context",
"get_subagent_status",
"list-subagent-roster",
"list-available-skills",
"task-status",
"list-open-tasks",
]
.into_iter()
.collect()
});
pub(in crate::api::routes::agent) static DELEGATION_TOOLS: LazyLock<HashSet<&'static str>> =
LazyLock::new(|| {
[
"delegate-subagent",
"assign-subagent",
"orchestrate-subagents",
"compose-subagent",
]
.into_iter()
.collect()
});
pub(in crate::api::routes::agent) static CRON_TOOLS: LazyLock<HashSet<&'static str>> =
LazyLock::new(|| {
[
"schedule-cron",
"create-cron-job",
"update-cron-job",
"cron-create",
]
.into_iter()
.collect()
});
pub(in crate::api::routes::agent) struct SubagentClaimGuard;
impl Guard for SubagentClaimGuard {
fn id(&self) -> GuardId {
GuardId::SubagentClaim
}
fn is_relevant(&self, _ctx: &GuardContext) -> bool {
true
}
fn evaluate(&self, content: &str, ctx: &GuardContext) -> GuardVerdict {
let prov = ctx.delegation_provenance;
let allow_claim = prov.subagent_task_started
&& prov.subagent_task_completed
&& prov.subagent_result_attached;
if allow_claim || !claims_unverified_subagent_output(content, ctx) {
return GuardVerdict::Pass;
}
tracing::warn!(
"guard[SubagentClaim]: blocking narrated delegation — retrying in character"
);
GuardVerdict::RetryRequested {
reason: format!(
"You narrated your intent to delegate instead of responding to the user. \
As {}, respond DIRECTLY to the user's message in your own voice. \
Do not announce delegation, do not mention subagents or specialists. \
If you need to delegate a mechanical lookup, do it silently via tool calls — \
never tell the user about your internal routing.",
ctx.agent_name
),
}
}
}
fn claims_unverified_subagent_output(response: &str, ctx: &GuardContext<'_>) -> bool {
let lower = response.to_ascii_lowercase();
const MARKERS: &[&str] = &[
"let me delegate",
"i'll delegate",
"i will delegate",
"delegating to",
"delegate the task",
"delegate this to",
"i have a specialist",
"i have a lore_keeper",
"i have a combat_referee",
"let me hand this off",
"handing off to",
"routing to specialist",
"compose a specialist",
"[delegating to subagent",
"came directly from the running subagent",
"came directly from a running subagent",
"subagent status - live",
"standing by for tasking",
"taskable subagents operational",
"subagent-generated",
];
let has_lexical_marker = MARKERS.iter().any(|m| lower.contains(m));
if response.len() < 100 {
let has_delegation_verb = lower.contains("delegate")
|| lower.contains("hand off")
|| lower.contains("handing off")
|| lower.contains("routing to");
let has_subagent_name = ctx
.subagent_names
.iter()
.any(|name| lower.contains(&name.to_ascii_lowercase()));
if !has_delegation_verb && !has_subagent_name && !has_lexical_marker {
return false;
}
}
has_lexical_marker
}
pub(in crate::api::routes::agent) struct TaskDeferralGuard;
impl Guard for TaskDeferralGuard {
fn id(&self) -> GuardId {
GuardId::TaskDeferral
}
fn is_relevant(&self, ctx: &GuardContext) -> bool {
let task_like = guard_context_is_task_like(ctx);
task_like && !ctx.tool_results.is_empty()
}
fn evaluate(&self, _content: &str, ctx: &GuardContext) -> GuardVerdict {
let introspection_only = ctx
.tool_results
.iter()
.all(|(name, _)| INTROSPECTION_TOOLS.contains(name.as_str()));
if !introspection_only {
return GuardVerdict::Pass;
}
let deferral_score = ctx
.semantic_guard_scores
.get("TASK_DEFERRAL")
.map(|(score, _trust)| *score)
.unwrap_or(0.0);
let looks_deferred = deferral_score > 0.7;
let runtime_introspected = ctx
.tool_results
.iter()
.any(|(name, _)| name == "get_runtime_context");
if looks_deferred
&& runtime_introspected
&& let Some(path) = first_absolute_path(ctx.user_prompt)
{
let blocker = format!(
"Blocked: {path} is outside my allowed runtime boundaries in this environment, so I cannot read it directly."
);
if requested_exact_bullet_count(ctx.user_prompt).is_some() {
return GuardVerdict::Rewritten(format!("- {blocker}"));
}
return GuardVerdict::Rewritten(blocker);
}
if looks_deferred {
tracing::warn!(
deferral_score,
"guard[TaskDeferral]: introspection ended in narrated future action"
);
return GuardVerdict::RetryRequested {
reason: "task turn stopped at narrated next-step instead of taking action".into(),
};
}
GuardVerdict::Pass
}
}
pub(in crate::api::routes::agent) struct InternalJargonGuard;
impl Guard for InternalJargonGuard {
fn id(&self) -> GuardId {
GuardId::InternalJargon
}
fn is_relevant(&self, ctx: &GuardContext) -> bool {
!ctx.has_intent(Intent::CapabilitySummary)
&& !ctx.has_intent(Intent::Introspection)
&& !ctx.has_intent(Intent::ProviderInventory)
}
fn evaluate(&self, content: &str, ctx: &GuardContext) -> GuardVerdict {
let lower = content.to_ascii_lowercase();
let narrated_delegation_score = ctx
.semantic_guard_scores
.get("NARRATED_DELEGATION")
.map(|(score, _trust)| *score)
.unwrap_or(0.0);
let user_lower = ctx.user_prompt.to_ascii_lowercase();
let subagent_leak = ctx
.subagent_names
.iter()
.any(|name| lower.contains(name.as_str()) && !user_lower.contains(name.as_str()));
tracing::debug!(
narrated_delegation_score,
subagent_leak,
subagent_count = ctx.subagent_names.len(),
matched_names = ?ctx.subagent_names.iter()
.filter(|name| lower.contains(name.as_str()) && !user_lower.contains(name.as_str()))
.collect::<Vec<_>>(),
content_len = lower.len(),
"guard[InternalJargon]: evaluated"
);
if narrated_delegation_score > 0.8 || subagent_leak {
tracing::warn!(
narrated_delegation_score,
subagent_leak,
"guard[InternalJargon]: infrastructure leakage detected"
);
return GuardVerdict::RetryRequested {
reason: "Your response exposed internal infrastructure details (subagent names, \
tool inventories, runtime state) to the user. Respond as your persona \
would — address the user's actual request without mentioning internal \
systems, tools, or subagents."
.into(),
};
}
let mut kept = Vec::new();
let mut removed = false;
for line in content.lines() {
let line_lower = line.trim().to_ascii_lowercase();
let internal = line_lower.starts_with("centralized delegation")
|| line_lower.starts_with("delegation gate");
if internal {
removed = true;
continue;
}
kept.push(line);
}
if !removed {
return GuardVerdict::Pass;
}
let cleaned = kept.join("\n").trim().to_string();
if cleaned.is_empty() {
return GuardVerdict::Rewritten(format!(
"{} here. I'll keep internals out of the reply and focus on actionable results.",
ctx.agent_name
));
}
GuardVerdict::Rewritten(cleaned)
}
}
pub(in crate::api::routes::agent) struct PerspectiveGuard;
impl Guard for PerspectiveGuard {
fn id(&self) -> GuardId {
GuardId::Perspective
}
fn is_relevant(&self, _ctx: &GuardContext) -> bool {
true
}
fn evaluate(&self, content: &str, ctx: &GuardContext) -> GuardVerdict {
if ctx.intents.contains(&Intent::Acknowledgement) {
return GuardVerdict::Pass;
}
if has_first_person_user_narration(content) {
tracing::warn!("guard[Perspective]: first-person narration of user detected");
GuardVerdict::RetryRequested {
reason: "Response narrates the user's actions or thoughts in first person. \
Describe the world's response or use second person ('you') instead. \
Do not assert the user's internal states."
.into(),
}
} else {
GuardVerdict::Pass
}
}
}
pub(super) fn has_first_person_user_narration(content: &str) -> bool {
let mut outside_quotes = Vec::new();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with('"') || trimmed.starts_with('\u{201c}') {
continue;
}
let mut in_quote = false;
let mut segment = String::new();
for ch in trimmed.chars() {
if ch == '"' || ch == '\u{201c}' || ch == '\u{201d}' {
in_quote = !in_quote;
continue;
}
if !in_quote {
segment.push(ch);
}
}
if !segment.is_empty() {
outside_quotes.push(segment);
}
}
let joined = outside_quotes.join(" ").to_ascii_lowercase();
let action_patterns = [
"i glance ",
"i look ",
"i shift ",
"i lean ",
"i reach ",
"i draw ",
"i pull ",
"i swing ",
"i move ",
"i walk ",
"i run ",
"i stand ",
"i sit ",
"i nod ",
"i shake ",
"i gesture ",
"i turn ",
"i feel ",
"i think ",
"i realize ",
"i notice ",
"i sense ",
"i know ",
"i understand ",
"i decide ",
"my fingers ",
"my hand ",
"my eyes ",
"my heart ",
"my gut ",
"my sword ",
"my blade ",
"my armor ",
"my weapon ",
];
let first_person_matches: usize = action_patterns
.iter()
.filter(|p| joined.contains(*p))
.count();
if first_person_matches >= 2 {
return true;
}
let pc_speech_patterns = [
"you say,",
"you say.",
"you say ",
"you reply,",
"you reply.",
"you reply ",
"you tell ",
"you ask,",
"you ask.",
"you call out",
"you whisper",
"you shout",
"you mutter",
"you growl",
"you announce",
"you declare",
"you respond",
"you answer",
];
let full_lower = content.to_ascii_lowercase();
let pc_speech_count = pc_speech_patterns
.iter()
.filter(|p| full_lower.contains(*p))
.count();
pc_speech_count >= 1
}
pub(in crate::api::routes::agent) struct DeclaredActionGuard;
impl Guard for DeclaredActionGuard {
fn id(&self) -> GuardId {
GuardId::DeclaredAction
}
fn is_relevant(&self, ctx: &GuardContext) -> bool {
ctx.tool_results.is_empty() && detect_declared_action(ctx.user_prompt).is_some()
}
fn evaluate(&self, content: &str, ctx: &GuardContext) -> GuardVerdict {
let Some((verb, target)) = detect_declared_action(ctx.user_prompt) else {
return GuardVerdict::Pass;
};
let content_lower = content.to_ascii_lowercase();
let verb_lower = verb.to_ascii_lowercase();
let target_lower = target.to_ascii_lowercase();
let references_action =
content_lower.contains(&verb_lower) || content_lower.contains(&target_lower);
let has_resolution = RESOLUTION_INDICATORS
.iter()
.any(|r| content_lower.contains(r));
if references_action || has_resolution {
return GuardVerdict::Pass;
}
tracing::warn!(
verb = %verb,
target = %target,
"guard[DeclaredAction]: user declared action but output doesn't resolve it"
);
GuardVerdict::RetryRequested {
reason: format!(
"The user declared an action: '{} {}'. Your response does not reference \
or resolve this action. You must acknowledge the user's declared intent \
and either resolve it (describe the attempt and outcome) or use your \
out-of-character voice to surface consequences and ask for confirmation \
before proceeding.",
verb, target
),
}
}
}
pub(in crate::api::routes::agent) fn detect_declared_action(
input: &str,
) -> Option<(String, String)> {
let lower = input.to_ascii_lowercase().trim().to_string();
let words: Vec<&str> = lower.split_whitespace().collect();
if words.len() < 2 {
return None;
}
let first = words[0];
if !ACTION_VERBS.contains(&first) {
let skip = match first {
"i" | "i'll" | "i'm" | "please" | "let" | "now" | "then" | "quickly" | "just" => 1,
"let's" => 1,
_ => return None,
};
let remaining: Vec<&str> = words[skip..].to_vec();
if remaining.is_empty() {
return None;
}
let actual_skip = if first == "let" && remaining.first() == Some(&"me") {
skip + 1
} else {
skip
};
let actual_words: Vec<&str> = words[actual_skip..].to_vec();
if actual_words.is_empty() || !ACTION_VERBS.contains(&actual_words[0]) {
return None;
}
let verb = actual_words[0].to_string();
let target = actual_words[1..].join(" ");
if target.is_empty() {
return None;
}
return Some((verb, target));
}
let verb = first.to_string();
let target = words[1..].join(" ");
Some((verb, target))
}
use roboticus_agent::task_state::ACTION_VERBS;
const RESOLUTION_INDICATORS: &[&str] = &[
"roll",
"d20",
"dc ",
"check",
"succeed",
"fail",
"miss",
"hit",
"attempt",
"try",
"manage",
"unable",
"succeed",
"result",
"before we resolve",
"before proceeding",
"are you sure",
"consequences",
"what would happen",
];