use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum QueryComplexity {
VeryShort,
Short,
Medium,
Long,
Unlimited,
}
impl QueryComplexity {
#[must_use]
pub const fn max_tokens(self, model_max: u32) -> u32 {
match self {
Self::VeryShort => 64,
Self::Short => 256,
Self::Medium => 512,
Self::Long => 1024,
Self::Unlimited => model_max,
}
}
}
#[must_use]
pub fn classify_query(message: &str) -> QueryComplexity {
let lower = message.to_lowercase();
let trimmed = lower.trim();
if is_unlimited(trimmed) {
return QueryComplexity::Unlimited;
}
if is_long(trimmed) {
return QueryComplexity::Long;
}
if is_very_short(trimmed) {
return QueryComplexity::VeryShort;
}
if is_short(trimmed) {
return QueryComplexity::Short;
}
if is_medium(trimmed) {
return QueryComplexity::Medium;
}
if trimmed.len() > 200 {
return QueryComplexity::Long;
}
QueryComplexity::Medium
}
fn is_unlimited(query: &str) -> bool {
let code_patterns = [
"write code",
"write a code",
"write the code",
"implement",
"create a function",
"write a function",
"write a program",
"write a script",
"write a class",
"write a module",
"code example",
"code snippet",
"refactor",
"debug this",
"fix this code",
"schreibe code",
"schreib code",
"implementiere",
"erstelle eine funktion",
"schreibe eine funktion",
"schreibe ein programm",
"schreibe ein skript",
"code-beispiel",
"codebeispiel",
"refaktoriere",
"refactore",
"behebe diesen fehler",
];
let longform_patterns = [
"write an essay",
"write a story",
"write an article",
"write a report",
"write a tutorial",
"write a guide",
"write a blog",
"generate a document",
"full implementation",
"research and document",
"schreibe einen aufsatz",
"schreibe eine geschichte",
"schreibe einen artikel",
"schreibe einen bericht",
"erstelle ein dokument",
"erstelle einen bericht",
"erstelle eine anleitung",
"vollständige implementierung",
"recherchiere",
"dokumentiere",
];
let question_marks = query.chars().filter(|&c| c == '?').count();
if question_marks >= 3 {
return true;
}
if has_numbered_list(query, 3) {
return true;
}
code_patterns.iter().any(|p| query.contains(p))
|| longform_patterns.iter().any(|p| query.contains(p))
}
fn has_numbered_list(query: &str, min_items: usize) -> bool {
let mut found = 0usize;
for i in 1..=20 {
let marker = format!("{i}.");
if query.contains(&marker) {
found += 1;
if found >= min_items {
return true;
}
} else if found > 0 {
break;
}
}
false
}
fn is_long(query: &str) -> bool {
let question_marks = query.chars().filter(|&c| c == '?').count();
if question_marks >= 2 {
return true;
}
let patterns = [
"and also",
"in detail",
"in depth",
"step by step",
"analyze",
"analyse",
"compare and contrast",
"pros and cons",
"advantages and disadvantages",
"write a",
"draft a",
"compose",
"create a plan",
"design a",
"und auch",
"und außerdem",
"im detail",
"ausführlich",
"schritt für schritt",
"analysiere",
"vergleiche",
"vor- und nachteile",
"vorteile und nachteile",
"schreibe ein",
"schreib ein",
"erstelle ein",
"entwurf",
"entwirf",
"erstelle einen plan",
"erstelle eine",
"erstelle einen",
"gestalte ein",
"plane ein",
"integriere",
"bitte analysiere",
"mit meilensteinen",
"mit kalendereinträgen",
"projektplan",
];
patterns.iter().any(|p| query.contains(p))
}
fn is_very_short(query: &str) -> bool {
let is_question = query.ends_with('?');
if !is_question {
return false;
}
let yn_starters = [
"is ", "are ", "was ", "were ", "do ", "does ", "did ", "can ", "could ", "will ", "would ",
"should ", "has ", "have ", "had ", "ist ", "sind ", "war ", "kann ", "könnte ", "wird ", "würde ", "sollte ", "hat ",
"haben ", "hatte ",
];
if yn_starters.iter().any(|s| query.starts_with(s)) {
return true;
}
let single_fact = [
"what time",
"what day",
"what date",
"how old",
"how many",
"how much",
"what year",
"what color",
"what colour",
"how tall",
"how far",
"how long is",
"how long does",
"what is the capital",
"what is the population",
"who won",
"true or false",
"wie spät",
"welcher tag",
"welches datum",
"wie alt",
"wie viele",
"wie viel",
"welches jahr",
"welche farbe",
"wie groß",
"wie weit",
"wie lang ist",
"wie lange dauert",
"was ist die hauptstadt",
"wer hat gewonnen",
"richtig oder falsch",
"stimmt es",
];
single_fact.iter().any(|p| query.contains(p))
}
fn is_short(query: &str) -> bool {
let patterns = [
"who is",
"who was",
"what is",
"what are",
"what was",
"define ",
"definition of",
"meaning of",
"when did",
"when was",
"when is",
"where is",
"where was",
"where are",
"translate",
"convert ",
"calculate ",
"what does",
"wer ist",
"wer war",
"was ist",
"was sind",
"was war",
"definiere ",
"definition von",
"bedeutung von",
"wann hat",
"wann war",
"wann ist",
"wo ist",
"wo war",
"wo sind",
"übersetze",
"umrechnen",
"berechne ",
"was bedeutet",
];
patterns.iter().any(|p| query.contains(p))
}
fn is_medium(query: &str) -> bool {
let patterns = [
"explain",
"how does",
"how do",
"how can",
"how to",
"why ",
"describe",
"summarize",
"summarise",
"list ",
"name ",
"what happens",
"tell me about",
"give me",
"overview",
"difference between",
"erkläre",
"erklär ",
"wie funktioniert",
"wie geht",
"wie kann",
"warum ",
"wieso ",
"weshalb ",
"beschreibe",
"beschreib ",
"zusammenfassung",
"fasse zusammen",
"fass zusammen",
"nenne ",
"nenn ",
"was passiert",
"erzähl mir",
"gib mir",
"überblick",
"unterschied zwischen",
"zeig mir",
"sag mir",
];
patterns.iter().any(|p| query.contains(p))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn yes_no_question_is_very_short() {
assert_eq!(
classify_query("Is the sky blue?"),
QueryComplexity::VeryShort
);
}
#[test]
fn can_question_is_very_short() {
assert_eq!(classify_query("Can dogs swim?"), QueryComplexity::VeryShort);
}
#[test]
fn what_time_is_very_short() {
assert_eq!(
classify_query("What time is it in Tokyo?"),
QueryComplexity::VeryShort
);
}
#[test]
fn how_many_is_very_short() {
assert_eq!(
classify_query("How many planets are in the solar system?"),
QueryComplexity::VeryShort
);
}
#[test]
fn capital_question_is_very_short() {
assert_eq!(
classify_query("What is the capital of France?"),
QueryComplexity::VeryShort
);
}
#[test]
fn who_is_query_is_short() {
assert_eq!(
classify_query("Who is Albert Einstein?"),
QueryComplexity::Short
);
}
#[test]
fn definition_query_is_short() {
assert_eq!(
classify_query("Define photosynthesis"),
QueryComplexity::Short
);
}
#[test]
fn when_did_is_short() {
assert_eq!(
classify_query("When did World War 2 end?"),
QueryComplexity::Short
);
}
#[test]
fn translate_is_short() {
assert_eq!(
classify_query("Translate 'hello' to German"),
QueryComplexity::Short
);
}
#[test]
fn where_is_query_is_short() {
assert_eq!(
classify_query("Where is the Eiffel Tower?"),
QueryComplexity::Short
);
}
#[test]
fn explain_query_is_medium() {
assert_eq!(
classify_query("Explain how gravity works"),
QueryComplexity::Medium
);
}
#[test]
fn how_does_query_is_medium() {
assert_eq!(
classify_query("How does a combustion engine work?"),
QueryComplexity::Medium
);
}
#[test]
fn list_query_is_medium() {
assert_eq!(
classify_query("List the top 5 programming languages"),
QueryComplexity::Medium
);
}
#[test]
fn why_query_is_medium() {
assert_eq!(
classify_query("Why is the ocean salty?"),
QueryComplexity::Medium
);
}
#[test]
fn summarize_query_is_medium() {
assert_eq!(
classify_query("Summarize the plot of Hamlet"),
QueryComplexity::Medium
);
}
#[test]
fn multi_question_is_long() {
assert_eq!(
classify_query("What is Rust? Why is it popular?"),
QueryComplexity::Long
);
}
#[test]
fn in_detail_is_long() {
assert_eq!(
classify_query("Explain quantum physics in detail"),
QueryComplexity::Long
);
}
#[test]
fn step_by_step_is_long() {
assert_eq!(
classify_query("Show me step by step how to bake bread"),
QueryComplexity::Long
);
}
#[test]
fn analyze_is_long() {
assert_eq!(
classify_query("Analyze the themes in Macbeth"),
QueryComplexity::Long
);
}
#[test]
fn pros_and_cons_is_long() {
assert_eq!(
classify_query("What are the pros and cons of electric cars?"),
QueryComplexity::Long
);
}
#[test]
fn write_code_is_unlimited() {
assert_eq!(
classify_query("Write code to sort a list in Python"),
QueryComplexity::Unlimited
);
}
#[test]
fn implement_is_unlimited() {
assert_eq!(
classify_query("Implement a binary search tree"),
QueryComplexity::Unlimited
);
}
#[test]
fn write_essay_is_unlimited() {
assert_eq!(
classify_query("Write an essay about climate change"),
QueryComplexity::Unlimited
);
}
#[test]
fn create_function_is_unlimited() {
assert_eq!(
classify_query("Create a function that validates emails"),
QueryComplexity::Unlimited
);
}
#[test]
fn refactor_is_unlimited() {
assert_eq!(
classify_query("Refactor this code to use async/await"),
QueryComplexity::Unlimited
);
}
#[test]
fn max_tokens_very_short() {
assert_eq!(QueryComplexity::VeryShort.max_tokens(2048), 64);
}
#[test]
fn max_tokens_short() {
assert_eq!(QueryComplexity::Short.max_tokens(2048), 256);
}
#[test]
fn max_tokens_medium() {
assert_eq!(QueryComplexity::Medium.max_tokens(2048), 512);
}
#[test]
fn max_tokens_long() {
assert_eq!(QueryComplexity::Long.max_tokens(2048), 1024);
}
#[test]
fn max_tokens_unlimited_uses_model_max() {
assert_eq!(QueryComplexity::Unlimited.max_tokens(2048), 2048);
assert_eq!(QueryComplexity::Unlimited.max_tokens(4096), 4096);
}
#[test]
fn ambiguous_query_defaults_to_medium() {
assert_eq!(classify_query("Tell me something"), QueryComplexity::Medium);
}
#[test]
fn empty_query_defaults_to_medium() {
assert_eq!(classify_query(""), QueryComplexity::Medium);
}
#[test]
fn case_insensitive_classification() {
assert_eq!(
classify_query("EXPLAIN how DNS works"),
QueryComplexity::Medium
);
assert_eq!(
classify_query("WRITE CODE for a web server"),
QueryComplexity::Unlimited
);
}
#[test]
fn long_unmatched_query_defaults_to_long() {
let long_query = "a]".repeat(101);
assert!(long_query.len() > 200);
assert_eq!(classify_query(&long_query), QueryComplexity::Long);
}
#[test]
fn short_unmatched_query_defaults_to_medium() {
assert_eq!(classify_query("Hallo Welt"), QueryComplexity::Medium);
}
#[test]
fn german_yes_no_is_very_short() {
assert_eq!(
classify_query("Ist der Himmel blau?"),
QueryComplexity::VeryShort,
);
}
#[test]
fn german_how_many_is_very_short() {
assert_eq!(
classify_query("Wie viele Planeten gibt es?"),
QueryComplexity::VeryShort,
);
}
#[test]
fn german_who_is_query_is_short() {
assert_eq!(
classify_query("Wer ist Albert Einstein?"),
QueryComplexity::Short,
);
}
#[test]
fn german_definition_is_short() {
assert_eq!(
classify_query("Definiere Photosynthese"),
QueryComplexity::Short,
);
}
#[test]
fn german_what_is_query_is_short() {
assert_eq!(
classify_query("Wann war die Berliner Mauer gefallen?"),
QueryComplexity::Short,
);
}
#[test]
fn german_explain_is_medium() {
assert_eq!(
classify_query("Erkläre wie Gravitation funktioniert"),
QueryComplexity::Medium,
);
}
#[test]
fn german_how_does_is_medium() {
assert_eq!(
classify_query("Wie funktioniert ein Verbrennungsmotor?"),
QueryComplexity::Medium,
);
}
#[test]
fn german_why_is_medium() {
assert_eq!(
classify_query("Warum ist der Ozean salzig?"),
QueryComplexity::Medium,
);
}
#[test]
fn german_create_plan_is_long() {
assert_eq!(
classify_query("Erstelle einen Plan für das Projekt"),
QueryComplexity::Long,
);
}
#[test]
fn german_in_detail_is_long() {
assert_eq!(
classify_query("Erkläre Quantenphysik ausführlich"),
QueryComplexity::Long,
);
}
#[test]
fn german_step_by_step_is_long() {
assert_eq!(
classify_query("Zeig mir Schritt für Schritt wie man Brot backt"),
QueryComplexity::Long,
);
}
#[test]
fn german_analyze_is_long() {
assert_eq!(
classify_query("Analysiere die Themen in Macbeth"),
QueryComplexity::Long,
);
}
#[test]
fn german_project_plan_is_long() {
assert_eq!(
classify_query("Erstelle einen Projektplan für SmartInventory mit Meilensteinen"),
QueryComplexity::Long,
);
}
#[test]
fn german_write_code_is_unlimited() {
assert_eq!(
classify_query("Schreibe Code zum Sortieren einer Liste"),
QueryComplexity::Unlimited,
);
}
#[test]
fn german_implement_is_unlimited() {
assert_eq!(
classify_query("Implementiere einen binären Suchbaum"),
QueryComplexity::Unlimited,
);
}
#[test]
fn german_write_report_is_unlimited() {
assert_eq!(
classify_query("Erstelle einen Bericht über den Klimawandel"),
QueryComplexity::Unlimited,
);
}
#[test]
fn german_recherchiere_is_unlimited() {
assert_eq!(
classify_query("Recherchiere im Web und dokumentiere die Ergebnisse"),
QueryComplexity::Unlimited,
);
}
#[test]
fn german_dokumentiere_is_unlimited() {
assert_eq!(
classify_query("Dokumentiere die Architektur des Systems"),
QueryComplexity::Unlimited,
);
}
#[test]
fn three_question_marks_is_unlimited() {
assert_eq!(
classify_query(
"Sind alle Anforderungen vollständig? \
Gibt es technische Risiken? \
Welche Standards müssen unterstützt werden?"
),
QueryComplexity::Unlimited,
);
}
#[test]
fn numbered_list_with_three_items_is_unlimited() {
assert_eq!(
classify_query(
"Prüfe folgendes: \
1. Sind die Anforderungen vollständig? \
2. Gibt es Risiken? \
3. Welche Standards gelten?"
),
QueryComplexity::Unlimited,
);
}
#[test]
fn research_analysis_prompt_is_unlimited() {
assert_eq!(
classify_query(
"Analysiere den Wissensgraphen für SmartInventory und prüfe: \
1. Sind alle funktionalen Anforderungen vollständig? \
2. Gibt es technische Risiken beim gewählten Stack? \
3. Welche Barcode-/QR-Standards müssen unterstützt werden? \
4. Welche Lieferanten-API-Standards (EDI, REST) sind Branchenstandard? \
Recherchiere im Web und dokumentiere die Ergebnisse."
),
QueryComplexity::Unlimited,
);
}
#[test]
fn two_question_marks_remains_long() {
assert_eq!(
classify_query("What is Rust? Why is it popular?"),
QueryComplexity::Long
);
}
#[test]
fn two_numbered_items_remains_long_or_less() {
let c = classify_query("Prüfe: 1. Ist alles da? 2. Fehlt etwas?");
assert_ne!(c, QueryComplexity::Unlimited);
}
#[test]
fn research_and_document_english_is_unlimited() {
assert_eq!(
classify_query("Research and document the API standards in the industry"),
QueryComplexity::Unlimited,
);
}
}