#![allow(clippy::module_name_repetitions)]
use alloc::format;
use alloc::string::{String, ToString};
use alloc::vec::Vec;
use core::convert::TryFrom;
pub use super::arithmetic::evaluate_fallback_formatted;
pub use super::language::{detect as detect_language, Language};
#[must_use]
pub fn normalize_prompt(prompt: &str) -> String {
let mut out = String::with_capacity(prompt.len());
let mut last_was_space = true;
for ch in prompt.chars() {
if is_unicode_letter_or_digit(ch) {
for lower in ch.to_lowercase() {
out.push(lower);
}
last_was_space = false;
} else if !last_was_space {
out.push(' ');
last_was_space = true;
}
}
if out.ends_with(' ') {
out.pop();
}
out
}
#[must_use]
pub fn tokenize_prompt(prompt: &str) -> Vec<String> {
normalize_prompt(prompt)
.split(' ')
.filter(|token| !token.is_empty())
.map(ToString::to_string)
.collect()
}
pub fn evaluate_arithmetic_expression(expression: &str) -> Result<String, String> {
evaluate_fallback_formatted(expression).map_err(|err| err.to_string())
}
#[must_use]
pub fn stable_id(prefix: &str, text: &str) -> String {
let mut hash = 0xcbf2_9ce4_8422_2325_u64;
for byte in text.as_bytes() {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
}
format!("{prefix}_{hash:016x}")
}
const UNKNOWN_OPENERS_EN: &[&str] = &[
"I don't know how to answer that yet.",
"I didn't understand you.",
"I'm not sure how to respond to that yet.",
"I haven't learned to answer that yet.",
"That one is new to me.",
];
const UNKNOWN_OPENERS_RU: &[&str] = &[
"Я пока не знаю, как ответить на это.",
"Я тебя не понял.",
"Я не уверен, как на это ответить.",
"Я ещё не научился отвечать на это.",
"Это для меня новое.",
];
const UNKNOWN_OPENERS_HI: &[&str] = &[
"मुझे अभी इसका उत्तर देना नहीं आता।",
"मैं समझ नहीं पाया।",
"मुझे यकीन नहीं है कि कैसे उत्तर दूँ।",
"मैंने अभी तक यह उत्तर देना नहीं सीखा।",
"यह मेरे लिए नया है।",
];
const UNKNOWN_OPENERS_ZH: &[&str] = &[
"我还不知道如何回答这个问题。",
"我不太明白你说的意思。",
"我不确定该如何回答。",
"我还没有学会回答这个问题。",
"这对我来说是新的。",
];
#[must_use]
pub fn unknown_openers_for(language: &str) -> &'static [&'static str] {
match language {
"ru" => UNKNOWN_OPENERS_RU,
"hi" => UNKNOWN_OPENERS_HI,
"zh" => UNKNOWN_OPENERS_ZH,
_ => UNKNOWN_OPENERS_EN,
}
}
#[must_use]
pub fn select_unknown_opener(prompt: &str, language: &str) -> &'static str {
let pool = unknown_openers_for(language);
debug_assert!(!pool.is_empty(), "unknown opener pool must be non-empty");
let trimmed = prompt.trim();
if trimmed.is_empty() {
return pool[0];
}
let id = stable_id("unknown_opener", trimmed);
let hex = id.rsplit('_').next().unwrap_or("0");
let value = u64::from_str_radix(hex, 16).unwrap_or(0);
let pool_len = pool.len() as u64;
let index = usize::try_from(value % pool_len).unwrap_or(0);
pool[index]
}
#[must_use]
pub fn matches_intent_route_parts(
normalized_prompt: &str,
raw_prompt: &str,
keywords: &[String],
phrases: &[String],
tokens: &[String],
combos: &[Vec<String>],
) -> bool {
if keywords
.iter()
.any(|keyword| normalized_prompt == keyword || raw_prompt == keyword)
{
return true;
}
if phrases
.iter()
.any(|phrase| normalized_prompt == phrase || raw_prompt == phrase)
{
return true;
}
if tokens
.iter()
.any(|token| contains_route_token(normalized_prompt, token))
{
return true;
}
combos.iter().any(|combo| {
!combo.is_empty()
&& combo
.iter()
.all(|token| contains_route_token(normalized_prompt, token))
})
}
#[must_use]
pub fn matches_intent_route_payload(payload: &str) -> bool {
let mut lines = payload.lines();
let normalized = lines.next().unwrap_or("");
let raw = normalize_route_raw_prompt(lines.next().unwrap_or(""));
let mut keywords = Vec::new();
let mut phrases = Vec::new();
let mut tokens = Vec::new();
let mut combos = Vec::new();
for line in lines {
let mut fields = line.split('\t');
let Some(kind) = fields.next() else {
continue;
};
match kind {
"K" => {
if let Some(value) = fields.next() {
keywords.push(value.to_string());
}
}
"P" => {
if let Some(value) = fields.next() {
phrases.push(value.to_string());
}
}
"T" => {
if let Some(value) = fields.next() {
tokens.push(value.to_string());
}
}
"C" => {
let combo = fields
.filter(|value| !value.is_empty())
.map(ToString::to_string)
.collect::<Vec<_>>();
if !combo.is_empty() {
combos.push(combo);
}
}
_ => {}
}
}
matches_intent_route_parts(normalized, &raw, &keywords, &phrases, &tokens, &combos)
}
fn contains_route_token(normalized_prompt: &str, expected: &str) -> bool {
normalized_prompt
.split_whitespace()
.any(|token| token == expected)
}
fn normalize_route_raw_prompt(prompt: &str) -> String {
let mut out = String::with_capacity(prompt.len());
for ch in prompt.chars() {
for lower in ch.to_lowercase() {
out.push(lower);
}
}
let trimmed = out.trim();
let trimmed = trimmed.trim_end_matches(['?', '。', '.', '!', ',', ';', ':']);
trimmed.trim().to_string()
}
fn is_unicode_letter_or_digit(ch: char) -> bool {
if ch.is_ascii_alphanumeric() {
return true;
}
let cp = ch as u32;
if (0x0400..=0x04FF).contains(&cp) || (0x0500..=0x052F).contains(&cp) {
return true;
}
if (0x0900..=0x097F).contains(&cp) {
return true;
}
if (0x3400..=0x4DBF).contains(&cp)
|| (0x4E00..=0x9FFF).contains(&cp)
|| (0xF900..=0xFAFF).contains(&cp)
|| (0x3040..=0x30FF).contains(&cp)
|| (0x3100..=0x312F).contains(&cp)
|| (0xAC00..=0xD7AF).contains(&cp)
{
return true;
}
if (0x00C0..=0x024F).contains(&cp) || (0x0370..=0x03FF).contains(&cp) {
return true;
}
false
}