ainl-semantic-tagger 0.1.9

Deterministic semantic tagging and normalization for AINL / ArmaraOS agents
Documentation
//! Tool name → canonical semantic tool tags.

use crate::tag::{SemanticTag, TagNamespace};

fn sanitize_tool_slug(name: &str) -> String {
    let mut s = name
        .trim()
        .to_lowercase()
        .chars()
        .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' })
        .collect::<String>();
    while s.contains("__") {
        s = s.replace("__", "_");
    }
    s.trim_matches('_').to_string()
}

fn map_tool_name(raw: &str) -> SemanticTag {
    let t = raw.trim();
    let l = t.to_ascii_lowercase();

    let (value, confidence) =
        if l == "python_repl" || l == "python" || l == "python3" || l.contains("python_repl") {
            ("python_repl", 0.85_f32)
        } else if l == "bash" || l == "shell" || l == "sh" || l.contains("shell") {
            ("bash", 0.85_f32)
        } else if l == "search_web" || l == "web_search" || l.contains("web_search") {
            ("search_web", 0.85_f32)
        } else if l == "web_fetch" || l == "fetch" || l.contains("web_fetch") {
            ("web_fetch", 0.85_f32)
        } else if l == "file_write" || l == "write_file" || l.contains("file_write") {
            ("file_write", 0.85_f32)
        } else if l == "file_read" || l.contains("file_read") {
            ("file_read", 0.85_f32)
        } else if l == "ainl_mcp" || l == "mcp" || l.contains("mcp") {
            ("mcp", 0.85_f32)
        } else if l == "cli" {
            ("cli", 0.85_f32)
        } else {
            let slug = sanitize_tool_slug(t);
            let slug = if slug.is_empty() {
                "unknown".to_string()
            } else {
                slug
            };
            return SemanticTag {
                namespace: TagNamespace::Tool,
                value: slug,
                confidence: 0.5,
            };
        };

    SemanticTag {
        namespace: TagNamespace::Tool,
        value: value.to_string(),
        confidence,
    }
}

/// Maps tool name strings to canonical [`TagNamespace::Tool`] tags. Unknown tools are retained with
/// a sanitized slug and confidence `0.5`.
pub fn tag_tool_names(tools: &[String]) -> Vec<SemanticTag> {
    tools
        .iter()
        .filter(|t| !t.trim().is_empty())
        .map(|t| map_tool_name(t))
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn maps_shell_exec_to_bash() {
        let v = tag_tool_names(&["shell_exec".into()]);
        assert_eq!(v[0].value, "bash");
    }

    #[test]
    fn maps_sh_to_bash() {
        let v = tag_tool_names(&["sh".into()]);
        assert_eq!(v[0].value, "bash");
    }

    #[test]
    fn unknown_tool_kept() {
        let v = tag_tool_names(&["my_custom_tool".into()]);
        assert_eq!(v[0].value, "my_custom_tool");
        assert!((v[0].confidence - 0.5).abs() < f32::EPSILON);
    }
}