Skip to main content

murmur_core/context/
title_analyzer.rs

1//! Auto-detect programming language and generate prompt context from window titles.
2//!
3//! Editors typically include the filename in the window title, e.g.:
4//! - `"auth.rs — Visual Studio Code"`
5//! - `"main.py - PyCharm"`
6//! - `"App.tsx - WebStorm"`
7//! - `"vim ~/.config/fish/config.fish"`
8//!
9//! This module extracts the filename, maps its extension to a programming language,
10//! and generates a descriptive prompt prefix for Whisper biasing — all without
11//! any user configuration.
12
13use super::provider::DictationMode;
14
15/// Result of analyzing a window title.
16#[derive(Debug, Clone, PartialEq)]
17pub struct TitleContext {
18    /// Detected programming language (e.g. "Rust", "Python")
19    pub language: Option<String>,
20    /// File extension that was matched (e.g. "rs", "py")
21    pub extension: Option<String>,
22    /// Extracted filename from the title (e.g. "auth.rs")
23    pub filename: Option<String>,
24    /// Auto-generated prompt prefix for Whisper (e.g. "Rust programming.")
25    pub prompt_prefix: Option<String>,
26    /// Suggested dictation mode based on detected context
27    pub suggested_mode: Option<DictationMode>,
28}
29
30/// Known file extension → (language name, prompt prefix, dictation mode) mappings.
31const EXTENSION_MAP: &[(&str, &str, &str)] = &[
32    // Systems
33    ("rs", "Rust", "Rust programming"),
34    ("go", "Go", "Go programming"),
35    ("c", "C", "C programming"),
36    ("h", "C", "C programming"),
37    ("cpp", "C++", "C++ programming"),
38    ("cc", "C++", "C++ programming"),
39    ("cxx", "C++", "C++ programming"),
40    ("hpp", "C++", "C++ programming"),
41    ("zig", "Zig", "Zig programming"),
42    // JVM
43    ("java", "Java", "Java programming"),
44    ("kt", "Kotlin", "Kotlin programming"),
45    ("kts", "Kotlin", "Kotlin programming"),
46    ("scala", "Scala", "Scala programming"),
47    ("groovy", "Groovy", "Groovy programming"),
48    // .NET
49    ("cs", "C#", "C# programming"),
50    ("fs", "F#", "F# programming"),
51    ("vb", "Visual Basic", "Visual Basic programming"),
52    // Web / JS
53    ("js", "JavaScript", "JavaScript programming"),
54    ("jsx", "JavaScript React", "JavaScript React programming"),
55    ("ts", "TypeScript", "TypeScript programming"),
56    ("tsx", "TypeScript React", "TypeScript React programming"),
57    ("mjs", "JavaScript", "JavaScript programming"),
58    ("cjs", "JavaScript", "JavaScript programming"),
59    // Python
60    ("py", "Python", "Python programming"),
61    ("pyi", "Python", "Python programming"),
62    ("pyx", "Cython", "Cython programming"),
63    // Ruby
64    ("rb", "Ruby", "Ruby programming"),
65    ("erb", "Ruby", "Ruby template"),
66    // PHP
67    ("php", "PHP", "PHP programming"),
68    // Swift / Obj-C
69    ("swift", "Swift", "Swift programming"),
70    ("m", "Objective-C", "Objective-C programming"),
71    ("mm", "Objective-C++", "Objective-C++ programming"),
72    // Shell
73    ("sh", "Shell", "Shell scripting"),
74    ("bash", "Bash", "Bash scripting"),
75    ("zsh", "Zsh", "Zsh scripting"),
76    ("fish", "Fish", "Fish shell scripting"),
77    ("ps1", "PowerShell", "PowerShell scripting"),
78    // Config / Data
79    ("json", "JSON", "JSON configuration"),
80    ("yaml", "YAML", "YAML configuration"),
81    ("yml", "YAML", "YAML configuration"),
82    ("toml", "TOML", "TOML configuration"),
83    ("xml", "XML", "XML markup"),
84    ("ini", "INI", "INI configuration"),
85    // Markup / Docs
86    ("md", "Markdown", "Markdown documentation"),
87    ("mdx", "MDX", "MDX documentation"),
88    ("rst", "reStructuredText", "reStructuredText documentation"),
89    ("tex", "LaTeX", "LaTeX document"),
90    ("html", "HTML", "HTML markup"),
91    ("htm", "HTML", "HTML markup"),
92    ("css", "CSS", "CSS styling"),
93    ("scss", "SCSS", "SCSS styling"),
94    ("sass", "Sass", "Sass styling"),
95    ("less", "Less", "Less styling"),
96    // Functional
97    ("hs", "Haskell", "Haskell programming"),
98    ("ml", "OCaml", "OCaml programming"),
99    ("mli", "OCaml", "OCaml programming"),
100    ("ex", "Elixir", "Elixir programming"),
101    ("exs", "Elixir", "Elixir programming"),
102    ("erl", "Erlang", "Erlang programming"),
103    ("clj", "Clojure", "Clojure programming"),
104    ("lisp", "Lisp", "Lisp programming"),
105    ("el", "Emacs Lisp", "Emacs Lisp programming"),
106    // Data / Query
107    ("sql", "SQL", "SQL database queries"),
108    ("graphql", "GraphQL", "GraphQL queries"),
109    ("gql", "GraphQL", "GraphQL queries"),
110    ("proto", "Protocol Buffers", "Protocol Buffers definition"),
111    // DevOps / Infra
112    ("tf", "Terraform", "Terraform infrastructure"),
113    ("hcl", "HCL", "HCL configuration"),
114    ("dockerfile", "Dockerfile", "Docker configuration"),
115    ("nix", "Nix", "Nix configuration"),
116    // Misc
117    ("r", "R", "R programming"),
118    ("jl", "Julia", "Julia programming"),
119    ("lua", "Lua", "Lua programming"),
120    ("dart", "Dart", "Dart programming"),
121    ("v", "V", "V programming"),
122    ("nim", "Nim", "Nim programming"),
123    ("cr", "Crystal", "Crystal programming"),
124];
125
126/// Terminal app bundle IDs — suggest command mode for these.
127const TERMINAL_APP_IDS: &[&str] = &[
128    "com.googlecode.iterm2",
129    "com.apple.Terminal",
130    "org.alacritty",
131    "io.warp.warpterm",
132    "net.kovidgoyal.kitty",
133    "com.github.wez.wezterm",
134];
135
136/// Analyze a window title to extract language and context information.
137pub fn analyze_title(title: &str) -> TitleContext {
138    let filename = extract_filename(title);
139
140    if let Some(ref name) = filename {
141        if let Some((ext, lang, prefix)) = lookup_extension(name) {
142            let mode = if is_doc_extension(ext) {
143                Some(DictationMode::Prose)
144            } else {
145                Some(DictationMode::Code)
146            };
147            return TitleContext {
148                language: Some(lang.to_string()),
149                extension: Some(ext.to_string()),
150                filename: Some(name.clone()),
151                prompt_prefix: Some(format!("{prefix}.")),
152                suggested_mode: mode,
153            };
154        }
155    }
156
157    TitleContext {
158        language: None,
159        extension: None,
160        filename,
161        prompt_prefix: None,
162        suggested_mode: None,
163    }
164}
165
166/// Check if an app is a terminal emulator.
167pub fn is_terminal_app(app_id: &str) -> bool {
168    TERMINAL_APP_IDS.contains(&app_id)
169}
170
171/// Extract a filename from a window title.
172///
173/// Handles common editor title formats:
174/// - `"filename.ext — App Name"` (VS Code, em dash)
175/// - `"filename.ext - App Name"` (JetBrains, hyphen)
176/// - `"filename.ext — Edited — App Name"` (multiple separators)
177/// - `"~/path/to/filename.ext"` (terminals)
178/// - `"App Name — filename.ext"` (some editors put filename last)
179fn extract_filename(title: &str) -> Option<String> {
180    let title = title.trim();
181    if title.is_empty() {
182        return None;
183    }
184
185    // Split on common title separators (em dash, en dash, hyphen with spaces)
186    let segments: Vec<&str> = title
187        .split(&['\u{2014}', '\u{2013}'][..]) // em dash, en dash
188        .flat_map(|s| s.split(" - "))
189        .map(|s| s.trim())
190        .filter(|s| !s.is_empty())
191        .collect();
192
193    // Check each segment for something that looks like a filename
194    for segment in &segments {
195        if let Some(name) = try_extract_filename_from_segment(segment) {
196            return Some(name);
197        }
198    }
199
200    // If no segment matched, try the whole title (e.g. terminal showing a path)
201    try_extract_filename_from_segment(title)
202}
203
204/// Try to find a filename with a known extension in a title segment.
205fn try_extract_filename_from_segment(segment: &str) -> Option<String> {
206    // Handle paths: take the last path component (supports / and \)
207    let candidate = segment
208        .rsplit(&['/', '\\'][..])
209        .next()
210        .unwrap_or(segment)
211        .trim();
212
213    // Strip common prefixes/suffixes editors add
214    let candidate = candidate
215        .trim_start_matches("● ") // VS Code modified indicator
216        .trim_start_matches("◉ ")
217        .trim_start_matches("* ")
218        .trim_end_matches(" [Modified]")
219        .trim_end_matches(" [+]")
220        .trim_end_matches(" •")
221        .trim();
222
223    // Must contain a dot for an extension
224    if candidate.rfind('.').is_some() {
225        // Only accept files with known programming/config extensions
226        // to avoid false matches like "report.pdf" or "Mr. Smith"
227        if lookup_extension(candidate).is_some() {
228            return Some(candidate.to_string());
229        }
230    }
231
232    // Handle extension-less known filenames
233    let lower = candidate.to_lowercase();
234    if matches!(
235        lower.as_str(),
236        "dockerfile" | "makefile" | "justfile" | "rakefile" | "gemfile" | "cmakelists.txt"
237    ) {
238        return Some(candidate.to_string());
239    }
240
241    None
242}
243
244/// Look up a filename's extension in the known language map.
245fn lookup_extension(filename: &str) -> Option<(&'static str, &'static str, &'static str)> {
246    // Handle "Dockerfile" and similar extensionless files
247    let lower = filename.to_lowercase();
248    if lower == "dockerfile" {
249        return Some(("dockerfile", "Dockerfile", "Docker configuration"));
250    }
251    if lower == "makefile" || lower == "justfile" {
252        return Some(("makefile", "Make", "Build system configuration"));
253    }
254
255    let ext = filename.rsplit('.').next()?.to_lowercase();
256    EXTENSION_MAP
257        .iter()
258        .find(|(e, _, _)| *e == ext.as_str())
259        .map(|&(e, l, p)| (e, l, p))
260}
261
262/// Check if an extension is for documentation/prose (should use prose mode).
263fn is_doc_extension(ext: &str) -> bool {
264    matches!(ext, "md" | "mdx" | "rst" | "tex" | "txt")
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    // -- analyze_title --
272
273    #[test]
274    fn test_analyze_rust_file() {
275        let ctx = analyze_title("auth.rs — Visual Studio Code");
276        assert_eq!(ctx.language.as_deref(), Some("Rust"));
277        assert_eq!(ctx.extension.as_deref(), Some("rs"));
278        assert_eq!(ctx.filename.as_deref(), Some("auth.rs"));
279        assert!(ctx.prompt_prefix.unwrap().contains("Rust"));
280        assert_eq!(ctx.suggested_mode, Some(DictationMode::Code));
281    }
282
283    #[test]
284    fn test_analyze_python_file() {
285        let ctx = analyze_title("main.py - PyCharm");
286        assert_eq!(ctx.language.as_deref(), Some("Python"));
287        assert_eq!(ctx.extension.as_deref(), Some("py"));
288        assert_eq!(ctx.filename.as_deref(), Some("main.py"));
289        assert_eq!(ctx.suggested_mode, Some(DictationMode::Code));
290    }
291
292    #[test]
293    fn test_analyze_typescript_react() {
294        let ctx = analyze_title("App.tsx — WebStorm");
295        assert_eq!(ctx.language.as_deref(), Some("TypeScript React"));
296        assert_eq!(ctx.extension.as_deref(), Some("tsx"));
297    }
298
299    #[test]
300    fn test_analyze_markdown_gets_prose_mode() {
301        let ctx = analyze_title("README.md — Visual Studio Code");
302        assert_eq!(ctx.language.as_deref(), Some("Markdown"));
303        assert_eq!(ctx.suggested_mode, Some(DictationMode::Prose));
304    }
305
306    #[test]
307    fn test_analyze_no_filename() {
308        let ctx = analyze_title("Google Chrome");
309        assert!(ctx.language.is_none());
310        assert!(ctx.prompt_prefix.is_none());
311        assert!(ctx.suggested_mode.is_none());
312    }
313
314    #[test]
315    fn test_analyze_empty_title() {
316        let ctx = analyze_title("");
317        assert!(ctx.language.is_none());
318        assert!(ctx.filename.is_none());
319    }
320
321    #[test]
322    fn test_analyze_path_in_title() {
323        let ctx = analyze_title("~/src/murmur/src/main.rs");
324        assert_eq!(ctx.language.as_deref(), Some("Rust"));
325        assert_eq!(ctx.filename.as_deref(), Some("main.rs"));
326    }
327
328    #[test]
329    fn test_analyze_modified_indicator() {
330        let ctx = analyze_title("● config.toml — Visual Studio Code");
331        assert_eq!(ctx.language.as_deref(), Some("TOML"));
332        assert_eq!(ctx.filename.as_deref(), Some("config.toml"));
333    }
334
335    #[test]
336    fn test_analyze_dockerfile() {
337        let ctx = analyze_title("Dockerfile — Visual Studio Code");
338        assert_eq!(ctx.language.as_deref(), Some("Dockerfile"));
339        assert_eq!(ctx.filename.as_deref(), Some("Dockerfile"));
340    }
341
342    #[test]
343    fn test_analyze_multiple_separators() {
344        let ctx = analyze_title("lib.rs — myproject — Visual Studio Code");
345        assert_eq!(ctx.language.as_deref(), Some("Rust"));
346        assert_eq!(ctx.filename.as_deref(), Some("lib.rs"));
347    }
348
349    #[test]
350    fn test_analyze_go_file() {
351        let ctx = analyze_title("handler.go — GoLand");
352        assert_eq!(ctx.language.as_deref(), Some("Go"));
353        assert_eq!(ctx.suggested_mode, Some(DictationMode::Code));
354    }
355
356    #[test]
357    fn test_analyze_sql_file() {
358        let ctx = analyze_title("schema.sql — DataGrip");
359        assert_eq!(ctx.language.as_deref(), Some("SQL"));
360        assert!(ctx.prompt_prefix.unwrap().contains("SQL"));
361    }
362
363    #[test]
364    fn test_analyze_shell_script() {
365        let ctx = analyze_title("deploy.sh — Terminal");
366        assert_eq!(ctx.language.as_deref(), Some("Shell"));
367    }
368
369    // -- extract_filename --
370
371    #[test]
372    fn test_extract_filename_em_dash() {
373        assert_eq!(
374            extract_filename("file.rs — App"),
375            Some("file.rs".to_string())
376        );
377    }
378
379    #[test]
380    fn test_extract_filename_hyphen() {
381        assert_eq!(
382            extract_filename("file.py - App"),
383            Some("file.py".to_string())
384        );
385    }
386
387    #[test]
388    fn test_extract_filename_path() {
389        assert_eq!(
390            extract_filename("/Users/me/src/main.rs"),
391            Some("main.rs".to_string())
392        );
393    }
394
395    #[test]
396    fn test_extract_filename_windows_path() {
397        assert_eq!(
398            extract_filename("C:\\Users\\me\\src\\main.rs"),
399            Some("main.rs".to_string())
400        );
401    }
402
403    #[test]
404    fn test_extract_filename_none_for_no_extension() {
405        assert!(extract_filename("Google Chrome").is_none());
406    }
407
408    #[test]
409    fn test_extract_filename_ignores_unknown_extensions() {
410        // Should not match non-programming extensions
411        assert!(extract_filename("report.pdf - Preview").is_none());
412        assert!(extract_filename("photo.jpg — Photos").is_none());
413        assert!(extract_filename("document.docx - Word").is_none());
414    }
415
416    #[test]
417    fn test_analyze_non_editor_app() {
418        // Browser tabs, email, etc. should not produce false matches
419        let ctx = analyze_title("GitHub - Pull Request #18 - Google Chrome");
420        assert!(ctx.language.is_none());
421        assert!(ctx.suggested_mode.is_none());
422    }
423
424    // -- is_terminal_app --
425
426    #[test]
427    fn test_is_terminal_app() {
428        assert!(is_terminal_app("com.apple.Terminal"));
429        assert!(is_terminal_app("com.googlecode.iterm2"));
430        assert!(!is_terminal_app("com.microsoft.VSCode"));
431    }
432
433    // -- lookup_extension --
434
435    #[test]
436    fn test_lookup_known_extensions() {
437        assert!(lookup_extension("file.rs").is_some());
438        assert!(lookup_extension("file.py").is_some());
439        assert!(lookup_extension("file.tsx").is_some());
440        assert!(lookup_extension("file.go").is_some());
441    }
442
443    #[test]
444    fn test_lookup_case_insensitive() {
445        assert!(lookup_extension("FILE.RS").is_some());
446        assert!(lookup_extension("Main.PY").is_some());
447    }
448
449    #[test]
450    fn test_lookup_unknown_extension() {
451        assert!(lookup_extension("file.xyz123").is_none());
452    }
453
454    // -- is_doc_extension --
455
456    #[test]
457    fn test_is_doc_extension() {
458        assert!(is_doc_extension("md"));
459        assert!(is_doc_extension("rst"));
460        assert!(is_doc_extension("tex"));
461        assert!(!is_doc_extension("rs"));
462        assert!(!is_doc_extension("py"));
463    }
464}