Skip to main content

lang_check/
config.rs

1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::Path;
5
6#[derive(Debug, Serialize, Deserialize, Clone)]
7pub struct Config {
8    #[serde(default)]
9    pub engines: EngineConfig,
10    #[serde(default)]
11    pub rules: HashMap<String, RuleConfig>,
12    #[serde(default = "default_exclude")]
13    pub exclude: Vec<String>,
14    #[serde(default)]
15    pub auto_fix: Vec<AutoFixRule>,
16    #[serde(default)]
17    pub performance: PerformanceConfig,
18    #[serde(default)]
19    pub dictionaries: DictionaryConfig,
20    #[serde(default)]
21    pub languages: LanguageConfig,
22    #[serde(default)]
23    pub workspace: WorkspaceConfig,
24}
25
26/// Language extension aliasing configuration.
27///
28/// Maps canonical language IDs to additional file extensions.
29/// Built-in extensions (e.g. `.md` → markdown, `.htm` → html) are always
30/// included; entries here add to them.
31///
32/// ```yaml
33/// languages:
34///   extensions:
35///     markdown: [mdx, Rmd]
36///     latex: [sty]
37/// ```
38#[derive(Debug, Serialize, Deserialize, Clone, Default)]
39pub struct LanguageConfig {
40    /// Additional file extensions per language ID (without leading dots).
41    #[serde(default)]
42    pub extensions: HashMap<String, Vec<String>>,
43    /// LaTeX-specific settings.
44    #[serde(default)]
45    pub latex: LaTeXConfig,
46}
47
48/// LaTeX-specific configuration.
49///
50/// ```yaml
51/// languages:
52///   latex:
53///     skip_environments:
54///       - prooftree
55///       - mycustomenv
56/// ```
57#[derive(Debug, Serialize, Deserialize, Clone, Default)]
58pub struct LaTeXConfig {
59    /// Extra environment names to skip during prose extraction.
60    /// These are checked in addition to the built-in skip list.
61    #[serde(default)]
62    pub skip_environments: Vec<String>,
63    /// Extra command names whose arguments should be skipped during prose
64    /// extraction. These are checked in addition to the built-in skip list
65    /// (which includes `texttt`, `verb`, `url`, etc.).
66    #[serde(default)]
67    pub skip_commands: Vec<String>,
68}
69
70/// Workspace-level settings.
71///
72/// ```yaml
73/// workspace:
74///   index_on_open: true
75/// ```
76#[derive(Debug, Serialize, Deserialize, Clone, Default)]
77pub struct WorkspaceConfig {
78    /// Whether to run a full workspace index when the project is opened.
79    /// Default: false (only check documents on open/change).
80    #[serde(default)]
81    pub index_on_open: bool,
82    /// Custom path for the workspace database file. When empty (default),
83    /// databases are stored in the user data directory.
84    #[serde(default)]
85    pub db_path: Option<String>,
86}
87
88/// Performance tuning options. High Performance Mode (HPM) disables
89/// expensive engines and external providers, using only harper-core.
90#[derive(Debug, Serialize, Deserialize, Clone)]
91pub struct PerformanceConfig {
92    /// Enable High Performance Mode (only harper, no LT/externals).
93    #[serde(default)]
94    pub high_performance_mode: bool,
95    /// Debounce delay in milliseconds for LSP on-type checking.
96    #[serde(default = "default_debounce_ms")]
97    pub debounce_ms: u64,
98    /// Maximum file size in bytes to check (0 = unlimited).
99    #[serde(default)]
100    pub max_file_size: usize,
101}
102
103impl Default for PerformanceConfig {
104    fn default() -> Self {
105        Self {
106            high_performance_mode: false,
107            debounce_ms: 300,
108            max_file_size: 0,
109        }
110    }
111}
112
113const fn default_debounce_ms() -> u64 {
114    300
115}
116
117/// Configuration for bundled and additional wordlist dictionaries.
118#[derive(Debug, Serialize, Deserialize, Clone)]
119pub struct DictionaryConfig {
120    /// Whether to load the bundled domain-specific dictionaries (software terms,
121    /// TypeScript, companies, jargon). Default: true.
122    #[serde(default = "default_true")]
123    pub bundled: bool,
124    /// Paths to additional wordlist files (one word per line, `#` comments).
125    /// Relative paths are resolved from the workspace root.
126    #[serde(default)]
127    pub paths: Vec<String>,
128}
129
130impl Default for DictionaryConfig {
131    fn default() -> Self {
132        Self {
133            bundled: true,
134            paths: Vec::new(),
135        }
136    }
137}
138
139/// A user-defined find->replace auto-fix rule.
140#[derive(Debug, Serialize, Deserialize, Clone)]
141pub struct AutoFixRule {
142    /// Pattern to find (plain text, case-sensitive).
143    pub find: String,
144    /// Replacement text.
145    pub replace: String,
146    /// Optional context filter: only apply when surrounding text matches.
147    #[serde(default)]
148    pub context: Option<String>,
149    /// Optional description for the rule.
150    #[serde(default)]
151    pub description: Option<String>,
152}
153
154#[derive(Debug, Serialize, Deserialize, Clone)]
155pub struct EngineConfig {
156    #[serde(default = "default_true")]
157    pub harper: bool,
158    #[serde(default)]
159    pub languagetool: bool,
160    #[serde(default = "default_lt_url")]
161    pub languagetool_url: String,
162    /// Which engine handles English checking: `"harper"` or `"languagetool"`.
163    #[serde(default = "default_english_engine")]
164    pub english_engine: String,
165    /// External checker providers registered via config.
166    #[serde(default)]
167    pub external: Vec<ExternalProvider>,
168    /// WASM checker plugins loaded via Extism.
169    #[serde(default)]
170    pub wasm_plugins: Vec<WasmPlugin>,
171    /// BCP-47 natural language tag for spell/grammar checking (e.g. "en-US", "de-DE").
172    #[serde(default = "default_spell_language")]
173    pub spell_language: String,
174}
175
176/// An external checker binary that communicates via stdin/stdout JSON.
177///
178/// The binary receives `{"text": "...", "language_id": "..."}` on stdin
179/// and returns `[{"start_byte": N, "end_byte": N, "message": "...", ...}]` on stdout.
180#[derive(Debug, Serialize, Deserialize, Clone)]
181pub struct ExternalProvider {
182    /// Display name for this provider.
183    pub name: String,
184    /// Path to the executable.
185    pub command: String,
186    /// Optional arguments to pass to the command.
187    #[serde(default)]
188    pub args: Vec<String>,
189    /// Optional file extensions this provider supports (empty = all).
190    #[serde(default)]
191    pub extensions: Vec<String>,
192}
193
194/// A WASM plugin loaded via Extism.
195///
196/// Plugins must export a `check` function that receives a JSON string
197/// `{"text": "...", "language_id": "..."}` and returns a JSON array of diagnostics.
198#[derive(Debug, Serialize, Deserialize, Clone)]
199pub struct WasmPlugin {
200    /// Display name for this plugin.
201    pub name: String,
202    /// Path to the `.wasm` file (relative to workspace root or absolute).
203    pub path: String,
204    /// Optional file extensions this plugin supports (empty = all).
205    #[serde(default)]
206    pub extensions: Vec<String>,
207}
208
209impl Default for EngineConfig {
210    fn default() -> Self {
211        Self {
212            harper: true,
213            languagetool: false,
214            languagetool_url: "http://localhost:8010".to_string(),
215            english_engine: "harper".to_string(),
216            external: Vec::new(),
217            wasm_plugins: Vec::new(),
218            spell_language: default_spell_language(),
219        }
220    }
221}
222
223#[derive(Debug, Serialize, Deserialize, Clone)]
224pub struct RuleConfig {
225    pub severity: Option<String>, // "error", "warning", "info", "hint", "off"
226}
227
228const fn default_true() -> bool {
229    true
230}
231fn default_lt_url() -> String {
232    "http://localhost:8010".to_string()
233}
234fn default_english_engine() -> String {
235    "harper".to_string()
236}
237fn default_spell_language() -> String {
238    "en-US".to_string()
239}
240fn default_exclude() -> Vec<String> {
241    vec![
242        "node_modules/**".to_string(),
243        ".git/**".to_string(),
244        "target/**".to_string(),
245        "dist/**".to_string(),
246        "build/**".to_string(),
247        ".next/**".to_string(),
248        ".nuxt/**".to_string(),
249        "vendor/**".to_string(),
250        "__pycache__/**".to_string(),
251        ".venv/**".to_string(),
252        "venv/**".to_string(),
253        ".tox/**".to_string(),
254        ".mypy_cache/**".to_string(),
255        "*.min.js".to_string(),
256        "*.min.css".to_string(),
257        "*.bundle.js".to_string(),
258        "package-lock.json".to_string(),
259        "yarn.lock".to_string(),
260        "pnpm-lock.yaml".to_string(),
261    ]
262}
263
264impl Config {
265    pub fn load(workspace_root: &Path) -> Result<Self> {
266        // Prefer YAML, fall back to JSON for backward compatibility
267        let yaml_path = workspace_root.join(".languagecheck.yaml");
268        let yml_path = workspace_root.join(".languagecheck.yml");
269        let json_path = workspace_root.join(".languagecheck.json");
270
271        if yaml_path.exists() {
272            let content = std::fs::read_to_string(yaml_path)?;
273            let config: Self = serde_yaml::from_str(&content)?;
274            Ok(config)
275        } else if yml_path.exists() {
276            let content = std::fs::read_to_string(yml_path)?;
277            let config: Self = serde_yaml::from_str(&content)?;
278            Ok(config)
279        } else if json_path.exists() {
280            let content = std::fs::read_to_string(json_path)?;
281            let config: Self = serde_json::from_str(&content)?;
282            Ok(config)
283        } else {
284            Ok(Self::default())
285        }
286    }
287
288    /// Apply user-defined auto-fix rules to the given text, returning the modified text
289    /// and the number of replacements made.
290    #[must_use]
291    pub fn apply_auto_fixes(&self, text: &str) -> (String, usize) {
292        let mut result = text.to_string();
293        let mut total = 0;
294
295        for rule in &self.auto_fix {
296            if let Some(ctx) = &rule.context
297                && !result.contains(ctx.as_str())
298            {
299                continue;
300            }
301            let count = result.matches(&rule.find).count();
302            if count > 0 {
303                result = result.replace(&rule.find, &rule.replace);
304                total += count;
305            }
306        }
307
308        (result, total)
309    }
310}
311
312impl Default for Config {
313    fn default() -> Self {
314        Self {
315            engines: EngineConfig::default(),
316            rules: HashMap::new(),
317            exclude: default_exclude(),
318            auto_fix: Vec::new(),
319            performance: PerformanceConfig::default(),
320            dictionaries: DictionaryConfig::default(),
321            languages: LanguageConfig::default(),
322            workspace: WorkspaceConfig::default(),
323        }
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn default_config_has_harper_enabled_lt_disabled() {
333        let config = Config::default();
334        assert!(config.engines.harper);
335        assert!(!config.engines.languagetool);
336    }
337
338    #[test]
339    fn default_config_has_standard_excludes() {
340        let config = Config::default();
341        assert!(config.exclude.contains(&"node_modules/**".to_string()));
342        assert!(config.exclude.contains(&".git/**".to_string()));
343        assert!(config.exclude.contains(&"target/**".to_string()));
344        assert!(config.exclude.contains(&"dist/**".to_string()));
345        assert!(config.exclude.contains(&"vendor/**".to_string()));
346    }
347
348    #[test]
349    fn default_lt_url() {
350        let config = Config::default();
351        assert_eq!(config.engines.languagetool_url, "http://localhost:8010");
352    }
353
354    #[test]
355    fn default_english_engine_is_harper() {
356        let config = Config::default();
357        assert_eq!(config.engines.english_engine, "harper");
358    }
359
360    #[test]
361    fn english_engine_from_yaml() {
362        let yaml = r#"
363engines:
364  english_engine: languagetool
365  languagetool: true
366"#;
367        let config: Config = serde_yaml::from_str(yaml).unwrap();
368        assert_eq!(config.engines.english_engine, "languagetool");
369    }
370
371    #[test]
372    fn load_from_json_string() {
373        let json = r#"{
374            "engines": { "harper": true, "languagetool": false },
375            "rules": { "spelling.typo": { "severity": "warning" } }
376        }"#;
377        let config: Config = serde_json::from_str(json).unwrap();
378        assert!(config.engines.harper);
379        assert!(!config.engines.languagetool);
380        assert!(config.rules.contains_key("spelling.typo"));
381        assert_eq!(
382            config.rules["spelling.typo"].severity.as_deref(),
383            Some("warning")
384        );
385    }
386
387    #[test]
388    fn load_partial_json_uses_defaults() {
389        let json = r#"{}"#;
390        let config: Config = serde_json::from_str(json).unwrap();
391        assert!(config.engines.harper);
392        assert!(!config.engines.languagetool);
393        assert!(config.rules.is_empty());
394    }
395
396    #[test]
397    fn load_from_json_file() {
398        let dir = std::env::temp_dir().join("lang_check_test_config_json");
399        let _ = std::fs::remove_dir_all(&dir);
400        std::fs::create_dir_all(&dir).unwrap();
401
402        let config_path = dir.join(".languagecheck.json");
403        std::fs::write(
404            &config_path,
405            r#"{"engines": {"harper": false, "languagetool": true}}"#,
406        )
407        .unwrap();
408
409        let config = Config::load(&dir).unwrap();
410        assert!(!config.engines.harper);
411        assert!(config.engines.languagetool);
412
413        let _ = std::fs::remove_dir_all(&dir);
414    }
415
416    #[test]
417    fn load_from_yaml_file() {
418        let dir = std::env::temp_dir().join("lang_check_test_config_yaml");
419        let _ = std::fs::remove_dir_all(&dir);
420        std::fs::create_dir_all(&dir).unwrap();
421
422        let config_path = dir.join(".languagecheck.yaml");
423        std::fs::write(
424            &config_path,
425            "engines:\n  harper: false\n  languagetool: true\n",
426        )
427        .unwrap();
428
429        let config = Config::load(&dir).unwrap();
430        assert!(!config.engines.harper);
431        assert!(config.engines.languagetool);
432
433        let _ = std::fs::remove_dir_all(&dir);
434    }
435
436    #[test]
437    fn yaml_takes_precedence_over_json() {
438        let dir = std::env::temp_dir().join("lang_check_test_config_precedence");
439        let _ = std::fs::remove_dir_all(&dir);
440        std::fs::create_dir_all(&dir).unwrap();
441
442        // Write both files with different values
443        std::fs::write(
444            dir.join(".languagecheck.yaml"),
445            "engines:\n  harper: false\n",
446        )
447        .unwrap();
448        std::fs::write(
449            dir.join(".languagecheck.json"),
450            r#"{"engines": {"harper": true}}"#,
451        )
452        .unwrap();
453
454        let config = Config::load(&dir).unwrap();
455        // YAML should win
456        assert!(!config.engines.harper);
457
458        let _ = std::fs::remove_dir_all(&dir);
459    }
460
461    #[test]
462    fn load_missing_file_returns_default() {
463        let dir = std::env::temp_dir().join("lang_check_test_config_missing");
464        let _ = std::fs::remove_dir_all(&dir);
465        std::fs::create_dir_all(&dir).unwrap();
466
467        let config = Config::load(&dir).unwrap();
468        assert!(config.engines.harper);
469
470        let _ = std::fs::remove_dir_all(&dir);
471    }
472
473    #[test]
474    fn auto_fix_simple_replacement() {
475        let config = Config {
476            auto_fix: vec![AutoFixRule {
477                find: "teh".to_string(),
478                replace: "the".to_string(),
479                context: None,
480                description: None,
481            }],
482            ..Config::default()
483        };
484        let (result, count) = config.apply_auto_fixes("Fix teh typo in teh text.");
485        assert_eq!(result, "Fix the typo in the text.");
486        assert_eq!(count, 2);
487    }
488
489    #[test]
490    fn auto_fix_with_context_filter() {
491        let config = Config {
492            auto_fix: vec![AutoFixRule {
493                find: "colour".to_string(),
494                replace: "color".to_string(),
495                context: Some("American".to_string()),
496                description: Some("Use American spelling".to_string()),
497            }],
498            ..Config::default()
499        };
500        // Context matches — replacement should happen
501        let (result, count) = config.apply_auto_fixes("American English: the colour is red.");
502        assert_eq!(result, "American English: the color is red.");
503        assert_eq!(count, 1);
504
505        // Context does not match — no replacement
506        let (result, count) = config.apply_auto_fixes("British English: the colour is red.");
507        assert_eq!(result, "British English: the colour is red.");
508        assert_eq!(count, 0);
509    }
510
511    #[test]
512    fn auto_fix_no_match() {
513        let config = Config {
514            auto_fix: vec![AutoFixRule {
515                find: "foo".to_string(),
516                replace: "bar".to_string(),
517                context: None,
518                description: None,
519            }],
520            ..Config::default()
521        };
522        let (result, count) = config.apply_auto_fixes("No matches here.");
523        assert_eq!(result, "No matches here.");
524        assert_eq!(count, 0);
525    }
526
527    #[test]
528    fn auto_fix_multiple_rules() {
529        let config = Config {
530            auto_fix: vec![
531                AutoFixRule {
532                    find: "recieve".to_string(),
533                    replace: "receive".to_string(),
534                    context: None,
535                    description: None,
536                },
537                AutoFixRule {
538                    find: "seperate".to_string(),
539                    replace: "separate".to_string(),
540                    context: None,
541                    description: None,
542                },
543            ],
544            ..Config::default()
545        };
546        let (result, count) = config.apply_auto_fixes("Please recieve the seperate package.");
547        assert_eq!(result, "Please receive the separate package.");
548        assert_eq!(count, 2);
549    }
550
551    #[test]
552    fn auto_fix_loads_from_yaml() {
553        let yaml = r#"
554auto_fix:
555  - find: "teh"
556    replace: "the"
557    description: "Fix common typo"
558  - find: "colour"
559    replace: "color"
560    context: "American"
561"#;
562        let config: Config = serde_yaml::from_str(yaml).unwrap();
563        assert_eq!(config.auto_fix.len(), 2);
564        assert_eq!(config.auto_fix[0].find, "teh");
565        assert_eq!(config.auto_fix[0].replace, "the");
566        assert_eq!(
567            config.auto_fix[0].description.as_deref(),
568            Some("Fix common typo")
569        );
570        assert_eq!(config.auto_fix[1].context.as_deref(), Some("American"));
571    }
572
573    #[test]
574    fn default_config_has_empty_auto_fix() {
575        let config = Config::default();
576        assert!(config.auto_fix.is_empty());
577    }
578
579    #[test]
580    fn external_providers_from_yaml() {
581        let yaml = r#"
582engines:
583  harper: true
584  languagetool: false
585  external:
586    - name: vale
587      command: /usr/bin/vale
588      args: ["--output", "JSON"]
589      extensions: [md, rst]
590    - name: custom-checker
591      command: ./my-checker
592"#;
593        let config: Config = serde_yaml::from_str(yaml).unwrap();
594        assert_eq!(config.engines.external.len(), 2);
595        assert_eq!(config.engines.external[0].name, "vale");
596        assert_eq!(config.engines.external[0].command, "/usr/bin/vale");
597        assert_eq!(config.engines.external[0].args, vec!["--output", "JSON"]);
598        assert_eq!(config.engines.external[0].extensions, vec!["md", "rst"]);
599        assert_eq!(config.engines.external[1].name, "custom-checker");
600        assert!(config.engines.external[1].args.is_empty());
601    }
602
603    #[test]
604    fn default_config_has_no_external_providers() {
605        let config = Config::default();
606        assert!(config.engines.external.is_empty());
607    }
608
609    #[test]
610    fn wasm_plugins_from_yaml() {
611        let yaml = r#"
612engines:
613  harper: true
614  wasm_plugins:
615    - name: custom-checker
616      path: .languagecheck/plugins/checker.wasm
617      extensions: [md, html]
618    - name: style-linter
619      path: /opt/plugins/style.wasm
620"#;
621        let config: Config = serde_yaml::from_str(yaml).unwrap();
622        assert_eq!(config.engines.wasm_plugins.len(), 2);
623        assert_eq!(config.engines.wasm_plugins[0].name, "custom-checker");
624        assert_eq!(
625            config.engines.wasm_plugins[0].path,
626            ".languagecheck/plugins/checker.wasm"
627        );
628        assert_eq!(
629            config.engines.wasm_plugins[0].extensions,
630            vec!["md", "html"]
631        );
632        assert_eq!(config.engines.wasm_plugins[1].name, "style-linter");
633        assert!(config.engines.wasm_plugins[1].extensions.is_empty());
634    }
635
636    #[test]
637    fn default_config_has_no_wasm_plugins() {
638        let config = Config::default();
639        assert!(config.engines.wasm_plugins.is_empty());
640    }
641
642    #[test]
643    fn performance_config_defaults() {
644        let config = Config::default();
645        assert!(!config.performance.high_performance_mode);
646        assert_eq!(config.performance.debounce_ms, 300);
647        assert_eq!(config.performance.max_file_size, 0);
648    }
649
650    #[test]
651    fn performance_config_from_yaml() {
652        let yaml = r#"
653performance:
654  high_performance_mode: true
655  debounce_ms: 500
656  max_file_size: 1048576
657"#;
658        let config: Config = serde_yaml::from_str(yaml).unwrap();
659        assert!(config.performance.high_performance_mode);
660        assert_eq!(config.performance.debounce_ms, 500);
661        assert_eq!(config.performance.max_file_size, 1_048_576);
662    }
663
664    #[test]
665    fn latex_skip_environments_from_yaml() {
666        let yaml = r#"
667languages:
668  latex:
669    skip_environments:
670      - prooftree
671      - mycustomenv
672"#;
673        let config: Config = serde_yaml::from_str(yaml).unwrap();
674        assert_eq!(
675            config.languages.latex.skip_environments,
676            vec!["prooftree", "mycustomenv"]
677        );
678    }
679
680    #[test]
681    fn default_config_has_empty_latex_skip_environments() {
682        let config = Config::default();
683        assert!(config.languages.latex.skip_environments.is_empty());
684    }
685
686    #[test]
687    fn latex_skip_commands_from_yaml() {
688        let yaml = r#"
689languages:
690  latex:
691    skip_commands:
692      - codefont
693      - myverb
694"#;
695        let config: Config = serde_yaml::from_str(yaml).unwrap();
696        assert_eq!(
697            config.languages.latex.skip_commands,
698            vec!["codefont", "myverb"]
699        );
700    }
701
702    #[test]
703    fn default_spell_language_is_en_us() {
704        let config = Config::default();
705        assert_eq!(config.engines.spell_language, "en-US");
706    }
707
708    #[test]
709    fn spell_language_from_yaml() {
710        let yaml = r#"
711engines:
712  spell_language: de-DE
713"#;
714        let config: Config = serde_yaml::from_str(yaml).unwrap();
715        assert_eq!(config.engines.spell_language, "de-DE");
716    }
717
718    #[test]
719    fn default_config_has_empty_latex_skip_commands() {
720        let config = Config::default();
721        assert!(config.languages.latex.skip_commands.is_empty());
722    }
723}