1fn normalize(s: &str) -> String {
19 let mut out = String::with_capacity(s.len());
20 let mut prev_space = true;
21 for c in s.chars() {
22 if c.is_alphanumeric() {
23 for lc in c.to_lowercase() {
24 out.push(lc);
25 }
26 prev_space = false;
27 } else if !prev_space {
28 out.push(' ');
29 prev_space = true;
30 }
31 }
32 if out.ends_with(' ') {
33 out.pop();
34 }
35 out
36}
37
38pub fn is_prompt_echo(response: &str, prompt: &str) -> bool {
55 let resp = normalize(response);
56 let prompt_n = normalize(prompt);
57
58 if resp.chars().count() < 8 || prompt_n.is_empty() {
59 return false;
60 }
61
62 if prompt_n.contains(&resp) {
63 return true;
64 }
65
66 let resp_words: Vec<&str> = resp.split_whitespace().collect();
67 let prompt_words: Vec<&str> = prompt_n.split_whitespace().collect();
68 if resp_words.len() < 6 {
69 return false;
70 }
71 let max_run = longest_common_word_run(&resp_words, &prompt_words);
72 max_run >= 6 && max_run.saturating_mul(10) >= resp_words.len().saturating_mul(7)
73}
74
75fn longest_common_word_run(a: &[&str], b: &[&str]) -> usize {
81 if a.is_empty() || b.is_empty() {
82 return 0;
83 }
84 let mut best = 0usize;
85 let mut prev = vec![0usize; b.len()];
86 let mut curr = vec![0usize; b.len()];
87 for ai in a {
88 for (j, bj) in b.iter().enumerate() {
89 curr[j] = if ai == bj {
90 if j == 0 {
91 1
92 } else {
93 prev[j - 1] + 1
94 }
95 } else {
96 0
97 };
98 if curr[j] > best {
99 best = curr[j];
100 }
101 }
102 std::mem::swap(&mut prev, &mut curr);
103 curr.fill(0);
104 }
105 best
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111
112 const SAMPLE_PROMPT: &str = "John Doe speaking. Professional, culinary register: \
113 French pastry, sourdough baking, fermentation science, restaurant kitchen workflows. \
114 Speech is in English or French; transcribe in the spoken language.";
115
116 #[test]
117 fn empty_prompt_never_echoes() {
118 assert!(!is_prompt_echo("hello world this is a test", ""));
119 }
120
121 #[test]
122 fn empty_response_not_echo() {
123 assert!(!is_prompt_echo("", SAMPLE_PROMPT));
124 }
125
126 #[test]
127 fn short_response_not_echo() {
128 assert!(!is_prompt_echo("John.", SAMPLE_PROMPT));
130 assert!(!is_prompt_echo("pastry", SAMPLE_PROMPT));
131 }
132
133 #[test]
134 fn full_prompt_echo_detected() {
135 assert!(is_prompt_echo(SAMPLE_PROMPT, SAMPLE_PROMPT));
136 }
137
138 #[test]
139 fn prefix_chunk_echo_detected() {
140 let chunk = "John Doe speaking. Professional, culinary register: \
141 French pastry, sourdough baking";
142 assert!(is_prompt_echo(chunk, SAMPLE_PROMPT));
143 }
144
145 #[test]
146 fn punctuation_and_case_insensitive() {
147 let chunk = "JOHN DOE SPEAKING — professional / culinary register";
148 assert!(is_prompt_echo(chunk, SAMPLE_PROMPT));
149 }
150
151 #[test]
152 fn partial_echo_with_extra_words_detected() {
153 let resp = "okay um John Doe speaking professional culinary register French \
155 pastry sourdough baking right";
156 assert!(is_prompt_echo(resp, SAMPLE_PROMPT));
157 }
158
159 #[test]
160 fn real_speech_not_flagged() {
161 let resp = "let's rebase this branch onto master and push it up to my fork";
164 assert!(!is_prompt_echo(resp, SAMPLE_PROMPT));
165 }
166
167 #[test]
168 fn real_speech_with_isolated_prompt_terms_not_flagged() {
169 let resp = "I am working on the sourdough recipe for a French pastry tonight";
171 assert!(!is_prompt_echo(resp, SAMPLE_PROMPT));
172 }
173
174 #[test]
175 fn longest_run_basic() {
176 let a = ["the", "quick", "brown", "fox"];
177 let b = ["jumps", "over", "the", "quick", "brown", "dog"];
178 assert_eq!(longest_common_word_run(&a, &b), 3);
179 }
180
181 #[test]
182 fn longest_run_no_overlap() {
183 let a = ["alpha", "beta"];
184 let b = ["gamma", "delta"];
185 assert_eq!(longest_common_word_run(&a, &b), 0);
186 }
187}