use std::collections::HashSet;
use std::env;
use std::fs;
use std::sync::RwLock;
use tracing::{debug, info, warn};
#[derive(Debug, Clone, PartialEq)]
pub enum InputType {
Command,
NaturalLanguage,
Goodbye,
}
pub struct InputClassifier {
path_commands: RwLock<HashSet<String>>,
}
impl InputClassifier {
pub fn new() -> Self {
let path_commands = Self::build_path_cache();
info!(
cached_commands = path_commands.len(),
"InputClassifier initialized with PATH cache"
);
Self {
path_commands: RwLock::new(path_commands),
}
}
pub fn reload_path_cache(&self) {
let new_cache = Self::build_path_cache();
info!(cached_commands = new_cache.len(), "PATH cache reloaded");
*self.path_commands.write().unwrap() = new_cache;
}
pub fn path_commands(&self) -> std::sync::RwLockReadGuard<'_, HashSet<String>> {
self.path_commands.read().unwrap()
}
pub fn classify(&self, input: &str) -> InputType {
let trimmed = input.trim();
if trimmed.is_empty() {
return InputType::Command;
}
if Self::is_goodbye_pattern(trimmed) {
debug!(input = %trimmed, reason = "goodbye_pattern", "Classified as Goodbye");
return InputType::Goodbye;
}
if self.is_jarvis_trigger(trimmed) {
debug!(input = %trimmed, reason = "jarvis_trigger", "Classified as NaturalLanguage");
return InputType::NaturalLanguage;
}
if self.is_natural_language_pattern(trimmed) {
debug!(input = %trimmed, reason = "nl_pattern", "Classified as NaturalLanguage");
return InputType::NaturalLanguage;
}
let first_token = Self::first_token(trimmed);
if Self::is_path_execution(first_token) {
debug!(input = %trimmed, first_token = %first_token, reason = "path_execution", "Classified as Command");
return InputType::Command;
}
if self.is_command_in_path(first_token) {
debug!(input = %trimmed, first_token = %first_token, reason = "path_lookup", "Classified as Command");
return InputType::Command;
}
if Self::has_shell_syntax(trimmed) {
debug!(input = %trimmed, reason = "shell_syntax", "Classified as Command");
return InputType::Command;
}
debug!(input = %trimmed, reason = "default", "Classified as NaturalLanguage");
InputType::NaturalLanguage
}
fn is_goodbye_pattern(input: &str) -> bool {
let lower = input.to_lowercase();
let body = Self::strip_jarvis_prefix(&lower);
let word_count = body.split_whitespace().count();
if word_count > 4 {
return false;
}
let goodbye_phrases = [
"bye",
"bye bye",
"bye-bye",
"byebye",
"goodbye",
"good bye",
"good-bye",
"see you",
"see ya",
"good night",
"goodnight",
"farewell",
"ciao",
];
for phrase in &goodbye_phrases {
if body == *phrase || body.starts_with(&format!("{phrase} ")) {
return true;
}
}
let jp_patterns = [
"さようなら",
"さよなら",
"おやすみ",
"おやすみなさい",
"バイバイ",
"ばいばい",
"じゃあね",
"じゃね",
"またね",
"また明日",
"またあとで",
"おつかれ",
"おつかれさま",
"おつかれさまでした",
"お疲れ様",
"お疲れさま",
"お疲れさまでした",
];
for pattern in &jp_patterns {
if body == *pattern || body.ends_with(pattern) {
return true;
}
}
false
}
fn strip_jarvis_prefix(input: &str) -> &str {
let prefixes = [
"hey jarvis, ",
"hey jarvis,",
"hey jarvis ",
"jarvis, ",
"jarvis,",
"jarvis ",
"j, ",
"j,",
];
for prefix in &prefixes {
if let Some(rest) = input.strip_prefix(prefix) {
return rest.trim();
}
}
input
}
fn build_path_cache() -> HashSet<String> {
let mut commands = HashSet::new();
let path_var = match env::var("PATH") {
Ok(p) => p,
Err(_) => {
warn!("PATH environment variable not set, classifier will rely on heuristics only");
return commands;
}
};
for dir in env::split_paths(&path_var) {
let entries = match fs::read_dir(&dir) {
Ok(e) => e,
Err(_) => continue, };
for entry in entries.flatten() {
if let Some(name) = entry.file_name().to_str() {
if let Ok(metadata) = fs::metadata(entry.path()) {
if metadata.is_file() {
commands.insert(name.to_string());
}
}
}
}
}
commands
}
fn is_jarvis_trigger(&self, input: &str) -> bool {
let lower = input.to_lowercase();
lower.starts_with("jarvis")
|| lower.starts_with("hey jarvis")
|| lower.starts_with("j,")
|| lower.starts_with("j ") && !self.is_command_in_path("j")
}
fn is_natural_language_pattern(&self, input: &str) -> bool {
let lower = input.to_lowercase();
if lower.ends_with('?') {
return true;
}
let first_word = lower.split_whitespace().next().unwrap_or("");
let has_multiple_words = lower.contains(' ');
if has_multiple_words {
let question_starters = [
"what", "how", "why", "where", "when", "who", "which", "can", "could", "would",
"should", "shall", "is", "are", "was", "were", "am", "do", "does", "did", "tell",
"explain", "describe", "show", "please", "help",
];
if question_starters.contains(&first_word) {
if !self.is_command_in_path(first_word) {
return true;
}
}
}
if lower.ends_with("して")
|| lower.ends_with("してください")
|| lower.ends_with("とは")
|| lower.ends_with("教えて")
|| lower.ends_with("ですか")
|| lower.ends_with("ますか")
|| lower.ends_with("なに")
|| lower.ends_with("何")
{
return true;
}
false
}
fn is_path_execution(first_token: &str) -> bool {
first_token.starts_with("./")
|| first_token.starts_with("../")
|| first_token.starts_with('/')
|| first_token.starts_with("~/")
}
fn is_command_in_path(&self, token: &str) -> bool {
self.path_commands.read().unwrap().contains(token)
}
fn has_shell_syntax(input: &str) -> bool {
input.contains('|')
|| input.contains("&&")
|| input.contains(';')
|| input.starts_with('$')
|| input.split_whitespace().any(|token| {
token.contains('=')
&& token.chars().next().is_some_and(|c| c.is_ascii_uppercase())
})
}
fn first_token(input: &str) -> &str {
input.split_whitespace().next().unwrap_or("")
}
}
pub fn is_ai_goodbye_response(text: &str) -> bool {
let trimmed = text.trim();
if trimmed.is_empty() {
return false;
}
let lines: Vec<&str> = trimmed.lines().collect();
let tail_start = if lines.len() > 3 { lines.len() - 3 } else { 0 };
let tail = lines[tail_start..].join("\n").to_lowercase();
let farewell_patterns = [
"goodbye",
"good bye",
"farewell",
"signing off",
"until next time",
"see you later",
"see you soon",
"good night",
"take care",
"さようなら",
"さよなら",
"おやすみなさい",
"良い夜を",
"良い一日を",
"お疲れ様",
"お疲れさま",
"またお会い",
];
farewell_patterns.iter().any(|p| tail.contains(p))
}
#[cfg(test)]
mod tests {
use super::*;
fn test_classifier() -> InputClassifier {
InputClassifier::new()
}
#[test]
fn classify_simple_command() {
let c = test_classifier();
assert_eq!(c.classify("ls"), InputType::Command);
assert_eq!(c.classify("ls -la"), InputType::Command);
}
#[test]
fn classify_git_commands() {
let c = test_classifier();
assert_eq!(c.classify("git status"), InputType::Command);
assert_eq!(c.classify("git log --oneline"), InputType::Command);
}
#[test]
fn classify_common_commands() {
let c = test_classifier();
assert_eq!(c.classify("echo hello"), InputType::Command);
assert_eq!(c.classify("cat file.txt"), InputType::Command);
assert_eq!(c.classify("grep error log.txt"), InputType::Command);
assert_eq!(c.classify("mkdir new_dir"), InputType::Command);
}
#[test]
fn classify_path_execution() {
let c = test_classifier();
assert_eq!(c.classify("./script.sh"), InputType::Command);
assert_eq!(c.classify("../bin/tool"), InputType::Command);
assert_eq!(c.classify("/usr/bin/python3"), InputType::Command);
assert_eq!(c.classify("~/bin/my_tool"), InputType::Command);
}
#[test]
fn classify_pipe_and_operators() {
let c = test_classifier();
assert_eq!(c.classify("cat file.txt | grep error"), InputType::Command);
assert_eq!(c.classify("make && make test"), InputType::Command);
assert_eq!(c.classify("cmd1 || cmd2"), InputType::Command);
}
#[test]
fn classify_variable_expansion() {
let c = test_classifier();
assert_eq!(c.classify("$HOME/bin/tool"), InputType::Command);
}
#[test]
fn classify_jarvis_trigger() {
let c = test_classifier();
assert_eq!(c.classify("jarvis, help me"), InputType::NaturalLanguage);
assert_eq!(
c.classify("Jarvis what is this?"),
InputType::NaturalLanguage
);
assert_eq!(c.classify("hey jarvis"), InputType::NaturalLanguage);
assert_eq!(c.classify("j, commit please"), InputType::NaturalLanguage);
}
#[test]
fn classify_question_patterns() {
let c = test_classifier();
assert_eq!(
c.classify("what does this error mean?"),
InputType::NaturalLanguage
);
assert_eq!(c.classify("how do I fix this?"), InputType::NaturalLanguage);
assert_eq!(
c.classify("why did the build fail?"),
InputType::NaturalLanguage
);
assert_eq!(
c.classify("where is the config file?"),
InputType::NaturalLanguage
);
}
#[test]
fn classify_question_mark_ending() {
let c = test_classifier();
assert_eq!(c.classify("what's the error?"), InputType::NaturalLanguage);
assert_eq!(c.classify("さっきのエラーは?"), InputType::NaturalLanguage);
}
#[test]
fn classify_request_patterns() {
let c = test_classifier();
assert_eq!(
c.classify("please explain the output"),
InputType::NaturalLanguage
);
assert_eq!(c.classify("help me debug this"), InputType::NaturalLanguage);
assert_eq!(c.classify("explain this error"), InputType::NaturalLanguage);
assert_eq!(
c.classify("tell me about git rebase"),
InputType::NaturalLanguage
);
}
#[test]
fn classify_japanese_patterns() {
let c = test_classifier();
assert_eq!(c.classify("エラーを教えて"), InputType::NaturalLanguage);
assert_eq!(
c.classify("このファイルを修正して"),
InputType::NaturalLanguage
);
assert_eq!(c.classify("gitとは"), InputType::NaturalLanguage);
assert_eq!(c.classify("これはなんですか"), InputType::NaturalLanguage);
}
#[test]
fn classify_empty_input() {
let c = test_classifier();
assert_eq!(c.classify(""), InputType::Command);
assert_eq!(c.classify(" "), InputType::Command);
}
#[test]
fn path_cache_contains_common_commands() {
let c = test_classifier();
let cache = c.path_commands();
assert!(cache.contains("ls"), "PATH cache should contain 'ls'");
assert!(cache.contains("cat"), "PATH cache should contain 'cat'");
}
#[test]
fn path_cache_does_not_contain_nonsense() {
let c = test_classifier();
assert!(!c
.path_commands()
.contains("xyzzy_nonexistent_command_12345"));
}
#[test]
fn reload_path_cache_reflects_new_path() {
use std::fs;
use std::os::unix::fs::PermissionsExt;
let c = test_classifier();
let fake_cmd = "zzz_jarvish_test_fake_cmd_42";
assert!(
!c.path_commands().contains(fake_cmd),
"fake command should not exist before reload"
);
assert_eq!(
c.classify(fake_cmd),
InputType::NaturalLanguage,
"unknown command should be classified as NaturalLanguage"
);
let tmp_dir = std::env::temp_dir().join("jarvish_test_path_reload");
let _ = fs::remove_dir_all(&tmp_dir);
fs::create_dir_all(&tmp_dir).expect("failed to create temp dir");
let fake_bin = tmp_dir.join(fake_cmd);
fs::write(&fake_bin, "#!/bin/sh\necho hello\n").expect("failed to write fake bin");
fs::set_permissions(&fake_bin, fs::Permissions::from_mode(0o755))
.expect("failed to set permissions");
let original_path = std::env::var("PATH").unwrap();
let new_path = format!("{}:{}", tmp_dir.display(), original_path);
unsafe {
std::env::set_var("PATH", &new_path);
}
assert_eq!(
c.classify(fake_cmd),
InputType::NaturalLanguage,
"should still be NaturalLanguage before reload"
);
c.reload_path_cache();
assert!(
c.path_commands().contains(fake_cmd),
"fake command should be in cache after reload"
);
assert_eq!(
c.classify(fake_cmd),
InputType::Command,
"should be classified as Command after reload"
);
unsafe {
std::env::set_var("PATH", &original_path);
}
let _ = fs::remove_dir_all(&tmp_dir);
}
#[test]
fn classify_apostrophe_input() {
let c = test_classifier();
assert_eq!(c.classify("I'm tired, Jarvis"), InputType::NaturalLanguage);
}
#[test]
fn classify_semicolon_command() {
let c = test_classifier();
assert_eq!(c.classify("echo hello; echo world"), InputType::Command);
}
#[test]
fn classify_goodbye_english() {
let c = test_classifier();
assert_eq!(c.classify("bye"), InputType::Goodbye);
assert_eq!(c.classify("Bye"), InputType::Goodbye);
assert_eq!(c.classify("BYE"), InputType::Goodbye);
assert_eq!(c.classify("bye bye"), InputType::Goodbye);
assert_eq!(c.classify("bye-bye"), InputType::Goodbye);
assert_eq!(c.classify("goodbye"), InputType::Goodbye);
assert_eq!(c.classify("Goodbye"), InputType::Goodbye);
assert_eq!(c.classify("good bye"), InputType::Goodbye);
assert_eq!(c.classify("farewell"), InputType::Goodbye);
assert_eq!(c.classify("see you"), InputType::Goodbye);
assert_eq!(c.classify("see ya"), InputType::Goodbye);
assert_eq!(c.classify("good night"), InputType::Goodbye);
assert_eq!(c.classify("goodnight"), InputType::Goodbye);
assert_eq!(c.classify("ciao"), InputType::Goodbye);
}
#[test]
fn classify_goodbye_japanese() {
let c = test_classifier();
assert_eq!(c.classify("さようなら"), InputType::Goodbye);
assert_eq!(c.classify("さよなら"), InputType::Goodbye);
assert_eq!(c.classify("おやすみ"), InputType::Goodbye);
assert_eq!(c.classify("おやすみなさい"), InputType::Goodbye);
assert_eq!(c.classify("バイバイ"), InputType::Goodbye);
assert_eq!(c.classify("ばいばい"), InputType::Goodbye);
assert_eq!(c.classify("じゃあね"), InputType::Goodbye);
assert_eq!(c.classify("じゃね"), InputType::Goodbye);
assert_eq!(c.classify("またね"), InputType::Goodbye);
assert_eq!(c.classify("また明日"), InputType::Goodbye);
assert_eq!(c.classify("おつかれ"), InputType::Goodbye);
assert_eq!(c.classify("おつかれさま"), InputType::Goodbye);
assert_eq!(c.classify("おつかれさまでした"), InputType::Goodbye);
assert_eq!(c.classify("お疲れ様"), InputType::Goodbye);
}
#[test]
fn classify_goodbye_with_jarvis_prefix() {
let c = test_classifier();
assert_eq!(c.classify("jarvis, goodbye"), InputType::Goodbye);
assert_eq!(c.classify("Jarvis goodbye"), InputType::Goodbye);
assert_eq!(c.classify("hey jarvis, bye"), InputType::Goodbye);
assert_eq!(c.classify("j, bye"), InputType::Goodbye);
assert_eq!(c.classify("jarvis, おやすみ"), InputType::Goodbye);
}
#[test]
fn classify_goodbye_with_trailing_words() {
let c = test_classifier();
assert_eq!(c.classify("bye jarvis"), InputType::Goodbye);
assert_eq!(c.classify("goodbye sir"), InputType::Goodbye);
assert_eq!(c.classify("see you later"), InputType::Goodbye);
}
#[test]
fn classify_goodbye_false_positives() {
let c = test_classifier();
assert_ne!(
c.classify("say goodbye to the old config file and update"),
InputType::Goodbye
);
assert_ne!(
c.classify("echo goodbye world from here today"),
InputType::Goodbye
);
}
#[test]
fn ai_goodbye_response_english() {
assert!(is_ai_goodbye_response("Goodbye, sir. It was a pleasure."));
assert!(is_ai_goodbye_response(
"I've completed the task.\nFarewell, sir."
));
assert!(is_ai_goodbye_response(
"That's all done.\nUntil next time, sir."
));
assert!(is_ai_goodbye_response("Take care, sir. Signing off."));
}
#[test]
fn ai_goodbye_response_japanese() {
assert!(is_ai_goodbye_response("承知しました。さようなら。"));
assert!(is_ai_goodbye_response(
"タスクは完了しました。\nおやすみなさい。"
));
assert!(is_ai_goodbye_response("お疲れ様でした。良い一日を。"));
}
#[test]
fn ai_goodbye_response_not_goodbye() {
assert!(!is_ai_goodbye_response(""));
assert!(!is_ai_goodbye_response(
"Here is the command you need: ls -la"
));
assert!(!is_ai_goodbye_response("エラーの原因はこちらです。"));
}
#[test]
fn ai_goodbye_response_only_checks_tail() {
let long_response = "Goodbye was mentioned here.\n\
Line 2\n\
Line 3\n\
Line 4\n\
Line 5\n\
This is just a normal response.";
assert!(!is_ai_goodbye_response(long_response));
}
}