Skip to main content

lang_check/
config.rs

1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::Path;
5
6#[derive(Debug, Serialize, Deserialize, Clone)]
7pub struct Config {
8    #[serde(default)]
9    pub engines: EngineConfig,
10    #[serde(default)]
11    pub rules: HashMap<String, RuleConfig>,
12    #[serde(default = "default_exclude")]
13    pub exclude: Vec<String>,
14    #[serde(default)]
15    pub auto_fix: Vec<AutoFixRule>,
16    #[serde(default)]
17    pub performance: PerformanceConfig,
18    #[serde(default)]
19    pub dictionaries: DictionaryConfig,
20    #[serde(default)]
21    pub languages: LanguageConfig,
22    #[serde(default)]
23    pub workspace: WorkspaceConfig,
24}
25
26/// Language extension aliasing configuration.
27///
28/// Maps canonical language IDs to additional file extensions.
29/// Built-in extensions (e.g. `.md` → markdown, `.htm` → html) are always
30/// included; entries here add to them.
31///
32/// ```yaml
33/// languages:
34///   extensions:
35///     markdown: [mdx, Rmd]
36///     latex: [sty]
37/// ```
38#[derive(Debug, Serialize, Deserialize, Clone, Default)]
39pub struct LanguageConfig {
40    /// Additional file extensions per language ID (without leading dots).
41    #[serde(default)]
42    pub extensions: HashMap<String, Vec<String>>,
43    /// LaTeX-specific settings.
44    #[serde(default)]
45    pub latex: LaTeXConfig,
46}
47
48/// LaTeX-specific configuration.
49///
50/// ```yaml
51/// languages:
52///   latex:
53///     skip_environments:
54///       - prooftree
55///       - mycustomenv
56/// ```
57#[derive(Debug, Serialize, Deserialize, Clone, Default)]
58pub struct LaTeXConfig {
59    /// Extra environment names to skip during prose extraction.
60    /// These are checked in addition to the built-in skip list.
61    #[serde(default)]
62    pub skip_environments: Vec<String>,
63    /// Extra command names whose arguments should be skipped during prose
64    /// extraction. These are checked in addition to the built-in skip list
65    /// (which includes `texttt`, `verb`, `url`, etc.).
66    #[serde(default)]
67    pub skip_commands: Vec<String>,
68}
69
70/// Workspace-level settings.
71///
72/// ```yaml
73/// workspace:
74///   index_on_open: true
75/// ```
76#[derive(Debug, Serialize, Deserialize, Clone, Default)]
77pub struct WorkspaceConfig {
78    /// Whether to run a full workspace index when the project is opened.
79    /// Default: false (only check documents on open/change).
80    #[serde(default)]
81    pub index_on_open: bool,
82}
83
84/// Performance tuning options. High Performance Mode (HPM) disables
85/// expensive engines and external providers, using only harper-core.
86#[derive(Debug, Serialize, Deserialize, Clone)]
87pub struct PerformanceConfig {
88    /// Enable High Performance Mode (only harper, no LT/externals).
89    #[serde(default)]
90    pub high_performance_mode: bool,
91    /// Debounce delay in milliseconds for LSP on-type checking.
92    #[serde(default = "default_debounce_ms")]
93    pub debounce_ms: u64,
94    /// Maximum file size in bytes to check (0 = unlimited).
95    #[serde(default)]
96    pub max_file_size: usize,
97}
98
99impl Default for PerformanceConfig {
100    fn default() -> Self {
101        Self {
102            high_performance_mode: false,
103            debounce_ms: 300,
104            max_file_size: 0,
105        }
106    }
107}
108
109const fn default_debounce_ms() -> u64 {
110    300
111}
112
113/// Configuration for bundled and additional wordlist dictionaries.
114#[derive(Debug, Serialize, Deserialize, Clone)]
115pub struct DictionaryConfig {
116    /// Whether to load the bundled domain-specific dictionaries (software terms,
117    /// TypeScript, companies, jargon). Default: true.
118    #[serde(default = "default_true")]
119    pub bundled: bool,
120    /// Paths to additional wordlist files (one word per line, `#` comments).
121    /// Relative paths are resolved from the workspace root.
122    #[serde(default)]
123    pub paths: Vec<String>,
124}
125
126impl Default for DictionaryConfig {
127    fn default() -> Self {
128        Self {
129            bundled: true,
130            paths: Vec::new(),
131        }
132    }
133}
134
135/// A user-defined find->replace auto-fix rule.
136#[derive(Debug, Serialize, Deserialize, Clone)]
137pub struct AutoFixRule {
138    /// Pattern to find (plain text, case-sensitive).
139    pub find: String,
140    /// Replacement text.
141    pub replace: String,
142    /// Optional context filter: only apply when surrounding text matches.
143    #[serde(default)]
144    pub context: Option<String>,
145    /// Optional description for the rule.
146    #[serde(default)]
147    pub description: Option<String>,
148}
149
150#[derive(Debug, Serialize, Deserialize, Clone)]
151pub struct EngineConfig {
152    #[serde(default = "default_true")]
153    pub harper: bool,
154    #[serde(default)]
155    pub languagetool: bool,
156    #[serde(default = "default_lt_url")]
157    pub languagetool_url: String,
158    /// Which engine handles English checking: `"harper"` or `"languagetool"`.
159    #[serde(default = "default_english_engine")]
160    pub english_engine: String,
161    /// External checker providers registered via config.
162    #[serde(default)]
163    pub external: Vec<ExternalProvider>,
164    /// WASM checker plugins loaded via Extism.
165    #[serde(default)]
166    pub wasm_plugins: Vec<WasmPlugin>,
167}
168
169/// An external checker binary that communicates via stdin/stdout JSON.
170///
171/// The binary receives `{"text": "...", "language_id": "..."}` on stdin
172/// and returns `[{"start_byte": N, "end_byte": N, "message": "...", ...}]` on stdout.
173#[derive(Debug, Serialize, Deserialize, Clone)]
174pub struct ExternalProvider {
175    /// Display name for this provider.
176    pub name: String,
177    /// Path to the executable.
178    pub command: String,
179    /// Optional arguments to pass to the command.
180    #[serde(default)]
181    pub args: Vec<String>,
182    /// Optional file extensions this provider supports (empty = all).
183    #[serde(default)]
184    pub extensions: Vec<String>,
185}
186
187/// A WASM plugin loaded via Extism.
188///
189/// Plugins must export a `check` function that receives a JSON string
190/// `{"text": "...", "language_id": "..."}` and returns a JSON array of diagnostics.
191#[derive(Debug, Serialize, Deserialize, Clone)]
192pub struct WasmPlugin {
193    /// Display name for this plugin.
194    pub name: String,
195    /// Path to the `.wasm` file (relative to workspace root or absolute).
196    pub path: String,
197    /// Optional file extensions this plugin supports (empty = all).
198    #[serde(default)]
199    pub extensions: Vec<String>,
200}
201
202impl Default for EngineConfig {
203    fn default() -> Self {
204        Self {
205            harper: true,
206            languagetool: false,
207            languagetool_url: "http://localhost:8010".to_string(),
208            english_engine: "harper".to_string(),
209            external: Vec::new(),
210            wasm_plugins: Vec::new(),
211        }
212    }
213}
214
215#[derive(Debug, Serialize, Deserialize, Clone)]
216pub struct RuleConfig {
217    pub severity: Option<String>, // "error", "warning", "info", "hint", "off"
218}
219
220const fn default_true() -> bool {
221    true
222}
223fn default_lt_url() -> String {
224    "http://localhost:8010".to_string()
225}
226fn default_english_engine() -> String {
227    "harper".to_string()
228}
229fn default_exclude() -> Vec<String> {
230    vec![
231        "node_modules/**".to_string(),
232        ".git/**".to_string(),
233        "target/**".to_string(),
234        "dist/**".to_string(),
235        "build/**".to_string(),
236        ".next/**".to_string(),
237        ".nuxt/**".to_string(),
238        "vendor/**".to_string(),
239        "__pycache__/**".to_string(),
240        ".venv/**".to_string(),
241        "venv/**".to_string(),
242        ".tox/**".to_string(),
243        ".mypy_cache/**".to_string(),
244        "*.min.js".to_string(),
245        "*.min.css".to_string(),
246        "*.bundle.js".to_string(),
247        "package-lock.json".to_string(),
248        "yarn.lock".to_string(),
249        "pnpm-lock.yaml".to_string(),
250    ]
251}
252
253impl Config {
254    pub fn load(workspace_root: &Path) -> Result<Self> {
255        // Prefer YAML, fall back to JSON for backward compatibility
256        let yaml_path = workspace_root.join(".languagecheck.yaml");
257        let yml_path = workspace_root.join(".languagecheck.yml");
258        let json_path = workspace_root.join(".languagecheck.json");
259
260        if yaml_path.exists() {
261            let content = std::fs::read_to_string(yaml_path)?;
262            let config: Self = serde_yaml::from_str(&content)?;
263            Ok(config)
264        } else if yml_path.exists() {
265            let content = std::fs::read_to_string(yml_path)?;
266            let config: Self = serde_yaml::from_str(&content)?;
267            Ok(config)
268        } else if json_path.exists() {
269            let content = std::fs::read_to_string(json_path)?;
270            let config: Self = serde_json::from_str(&content)?;
271            Ok(config)
272        } else {
273            Ok(Self::default())
274        }
275    }
276
277    /// Apply user-defined auto-fix rules to the given text, returning the modified text
278    /// and the number of replacements made.
279    #[must_use]
280    pub fn apply_auto_fixes(&self, text: &str) -> (String, usize) {
281        let mut result = text.to_string();
282        let mut total = 0;
283
284        for rule in &self.auto_fix {
285            if let Some(ctx) = &rule.context
286                && !result.contains(ctx.as_str())
287            {
288                continue;
289            }
290            let count = result.matches(&rule.find).count();
291            if count > 0 {
292                result = result.replace(&rule.find, &rule.replace);
293                total += count;
294            }
295        }
296
297        (result, total)
298    }
299}
300
301impl Default for Config {
302    fn default() -> Self {
303        Self {
304            engines: EngineConfig::default(),
305            rules: HashMap::new(),
306            exclude: default_exclude(),
307            auto_fix: Vec::new(),
308            performance: PerformanceConfig::default(),
309            dictionaries: DictionaryConfig::default(),
310            languages: LanguageConfig::default(),
311            workspace: WorkspaceConfig::default(),
312        }
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    #[test]
321    fn default_config_has_harper_enabled_lt_disabled() {
322        let config = Config::default();
323        assert!(config.engines.harper);
324        assert!(!config.engines.languagetool);
325    }
326
327    #[test]
328    fn default_config_has_standard_excludes() {
329        let config = Config::default();
330        assert!(config.exclude.contains(&"node_modules/**".to_string()));
331        assert!(config.exclude.contains(&".git/**".to_string()));
332        assert!(config.exclude.contains(&"target/**".to_string()));
333        assert!(config.exclude.contains(&"dist/**".to_string()));
334        assert!(config.exclude.contains(&"vendor/**".to_string()));
335    }
336
337    #[test]
338    fn default_lt_url() {
339        let config = Config::default();
340        assert_eq!(config.engines.languagetool_url, "http://localhost:8010");
341    }
342
343    #[test]
344    fn default_english_engine_is_harper() {
345        let config = Config::default();
346        assert_eq!(config.engines.english_engine, "harper");
347    }
348
349    #[test]
350    fn english_engine_from_yaml() {
351        let yaml = r#"
352engines:
353  english_engine: languagetool
354  languagetool: true
355"#;
356        let config: Config = serde_yaml::from_str(yaml).unwrap();
357        assert_eq!(config.engines.english_engine, "languagetool");
358    }
359
360    #[test]
361    fn load_from_json_string() {
362        let json = r#"{
363            "engines": { "harper": true, "languagetool": false },
364            "rules": { "spelling.typo": { "severity": "warning" } }
365        }"#;
366        let config: Config = serde_json::from_str(json).unwrap();
367        assert!(config.engines.harper);
368        assert!(!config.engines.languagetool);
369        assert!(config.rules.contains_key("spelling.typo"));
370        assert_eq!(
371            config.rules["spelling.typo"].severity.as_deref(),
372            Some("warning")
373        );
374    }
375
376    #[test]
377    fn load_partial_json_uses_defaults() {
378        let json = r#"{}"#;
379        let config: Config = serde_json::from_str(json).unwrap();
380        assert!(config.engines.harper);
381        assert!(!config.engines.languagetool);
382        assert!(config.rules.is_empty());
383    }
384
385    #[test]
386    fn load_from_json_file() {
387        let dir = std::env::temp_dir().join("lang_check_test_config_json");
388        let _ = std::fs::remove_dir_all(&dir);
389        std::fs::create_dir_all(&dir).unwrap();
390
391        let config_path = dir.join(".languagecheck.json");
392        std::fs::write(
393            &config_path,
394            r#"{"engines": {"harper": false, "languagetool": true}}"#,
395        )
396        .unwrap();
397
398        let config = Config::load(&dir).unwrap();
399        assert!(!config.engines.harper);
400        assert!(config.engines.languagetool);
401
402        let _ = std::fs::remove_dir_all(&dir);
403    }
404
405    #[test]
406    fn load_from_yaml_file() {
407        let dir = std::env::temp_dir().join("lang_check_test_config_yaml");
408        let _ = std::fs::remove_dir_all(&dir);
409        std::fs::create_dir_all(&dir).unwrap();
410
411        let config_path = dir.join(".languagecheck.yaml");
412        std::fs::write(
413            &config_path,
414            "engines:\n  harper: false\n  languagetool: true\n",
415        )
416        .unwrap();
417
418        let config = Config::load(&dir).unwrap();
419        assert!(!config.engines.harper);
420        assert!(config.engines.languagetool);
421
422        let _ = std::fs::remove_dir_all(&dir);
423    }
424
425    #[test]
426    fn yaml_takes_precedence_over_json() {
427        let dir = std::env::temp_dir().join("lang_check_test_config_precedence");
428        let _ = std::fs::remove_dir_all(&dir);
429        std::fs::create_dir_all(&dir).unwrap();
430
431        // Write both files with different values
432        std::fs::write(
433            dir.join(".languagecheck.yaml"),
434            "engines:\n  harper: false\n",
435        )
436        .unwrap();
437        std::fs::write(
438            dir.join(".languagecheck.json"),
439            r#"{"engines": {"harper": true}}"#,
440        )
441        .unwrap();
442
443        let config = Config::load(&dir).unwrap();
444        // YAML should win
445        assert!(!config.engines.harper);
446
447        let _ = std::fs::remove_dir_all(&dir);
448    }
449
450    #[test]
451    fn load_missing_file_returns_default() {
452        let dir = std::env::temp_dir().join("lang_check_test_config_missing");
453        let _ = std::fs::remove_dir_all(&dir);
454        std::fs::create_dir_all(&dir).unwrap();
455
456        let config = Config::load(&dir).unwrap();
457        assert!(config.engines.harper);
458
459        let _ = std::fs::remove_dir_all(&dir);
460    }
461
462    #[test]
463    fn auto_fix_simple_replacement() {
464        let config = Config {
465            auto_fix: vec![AutoFixRule {
466                find: "teh".to_string(),
467                replace: "the".to_string(),
468                context: None,
469                description: None,
470            }],
471            ..Config::default()
472        };
473        let (result, count) = config.apply_auto_fixes("Fix teh typo in teh text.");
474        assert_eq!(result, "Fix the typo in the text.");
475        assert_eq!(count, 2);
476    }
477
478    #[test]
479    fn auto_fix_with_context_filter() {
480        let config = Config {
481            auto_fix: vec![AutoFixRule {
482                find: "colour".to_string(),
483                replace: "color".to_string(),
484                context: Some("American".to_string()),
485                description: Some("Use American spelling".to_string()),
486            }],
487            ..Config::default()
488        };
489        // Context matches — replacement should happen
490        let (result, count) = config.apply_auto_fixes("American English: the colour is red.");
491        assert_eq!(result, "American English: the color is red.");
492        assert_eq!(count, 1);
493
494        // Context does not match — no replacement
495        let (result, count) = config.apply_auto_fixes("British English: the colour is red.");
496        assert_eq!(result, "British English: the colour is red.");
497        assert_eq!(count, 0);
498    }
499
500    #[test]
501    fn auto_fix_no_match() {
502        let config = Config {
503            auto_fix: vec![AutoFixRule {
504                find: "foo".to_string(),
505                replace: "bar".to_string(),
506                context: None,
507                description: None,
508            }],
509            ..Config::default()
510        };
511        let (result, count) = config.apply_auto_fixes("No matches here.");
512        assert_eq!(result, "No matches here.");
513        assert_eq!(count, 0);
514    }
515
516    #[test]
517    fn auto_fix_multiple_rules() {
518        let config = Config {
519            auto_fix: vec![
520                AutoFixRule {
521                    find: "recieve".to_string(),
522                    replace: "receive".to_string(),
523                    context: None,
524                    description: None,
525                },
526                AutoFixRule {
527                    find: "seperate".to_string(),
528                    replace: "separate".to_string(),
529                    context: None,
530                    description: None,
531                },
532            ],
533            ..Config::default()
534        };
535        let (result, count) = config.apply_auto_fixes("Please recieve the seperate package.");
536        assert_eq!(result, "Please receive the separate package.");
537        assert_eq!(count, 2);
538    }
539
540    #[test]
541    fn auto_fix_loads_from_yaml() {
542        let yaml = r#"
543auto_fix:
544  - find: "teh"
545    replace: "the"
546    description: "Fix common typo"
547  - find: "colour"
548    replace: "color"
549    context: "American"
550"#;
551        let config: Config = serde_yaml::from_str(yaml).unwrap();
552        assert_eq!(config.auto_fix.len(), 2);
553        assert_eq!(config.auto_fix[0].find, "teh");
554        assert_eq!(config.auto_fix[0].replace, "the");
555        assert_eq!(
556            config.auto_fix[0].description.as_deref(),
557            Some("Fix common typo")
558        );
559        assert_eq!(config.auto_fix[1].context.as_deref(), Some("American"));
560    }
561
562    #[test]
563    fn default_config_has_empty_auto_fix() {
564        let config = Config::default();
565        assert!(config.auto_fix.is_empty());
566    }
567
568    #[test]
569    fn external_providers_from_yaml() {
570        let yaml = r#"
571engines:
572  harper: true
573  languagetool: false
574  external:
575    - name: vale
576      command: /usr/bin/vale
577      args: ["--output", "JSON"]
578      extensions: [md, rst]
579    - name: custom-checker
580      command: ./my-checker
581"#;
582        let config: Config = serde_yaml::from_str(yaml).unwrap();
583        assert_eq!(config.engines.external.len(), 2);
584        assert_eq!(config.engines.external[0].name, "vale");
585        assert_eq!(config.engines.external[0].command, "/usr/bin/vale");
586        assert_eq!(config.engines.external[0].args, vec!["--output", "JSON"]);
587        assert_eq!(config.engines.external[0].extensions, vec!["md", "rst"]);
588        assert_eq!(config.engines.external[1].name, "custom-checker");
589        assert!(config.engines.external[1].args.is_empty());
590    }
591
592    #[test]
593    fn default_config_has_no_external_providers() {
594        let config = Config::default();
595        assert!(config.engines.external.is_empty());
596    }
597
598    #[test]
599    fn wasm_plugins_from_yaml() {
600        let yaml = r#"
601engines:
602  harper: true
603  wasm_plugins:
604    - name: custom-checker
605      path: .languagecheck/plugins/checker.wasm
606      extensions: [md, html]
607    - name: style-linter
608      path: /opt/plugins/style.wasm
609"#;
610        let config: Config = serde_yaml::from_str(yaml).unwrap();
611        assert_eq!(config.engines.wasm_plugins.len(), 2);
612        assert_eq!(config.engines.wasm_plugins[0].name, "custom-checker");
613        assert_eq!(
614            config.engines.wasm_plugins[0].path,
615            ".languagecheck/plugins/checker.wasm"
616        );
617        assert_eq!(
618            config.engines.wasm_plugins[0].extensions,
619            vec!["md", "html"]
620        );
621        assert_eq!(config.engines.wasm_plugins[1].name, "style-linter");
622        assert!(config.engines.wasm_plugins[1].extensions.is_empty());
623    }
624
625    #[test]
626    fn default_config_has_no_wasm_plugins() {
627        let config = Config::default();
628        assert!(config.engines.wasm_plugins.is_empty());
629    }
630
631    #[test]
632    fn performance_config_defaults() {
633        let config = Config::default();
634        assert!(!config.performance.high_performance_mode);
635        assert_eq!(config.performance.debounce_ms, 300);
636        assert_eq!(config.performance.max_file_size, 0);
637    }
638
639    #[test]
640    fn performance_config_from_yaml() {
641        let yaml = r#"
642performance:
643  high_performance_mode: true
644  debounce_ms: 500
645  max_file_size: 1048576
646"#;
647        let config: Config = serde_yaml::from_str(yaml).unwrap();
648        assert!(config.performance.high_performance_mode);
649        assert_eq!(config.performance.debounce_ms, 500);
650        assert_eq!(config.performance.max_file_size, 1_048_576);
651    }
652
653    #[test]
654    fn latex_skip_environments_from_yaml() {
655        let yaml = r#"
656languages:
657  latex:
658    skip_environments:
659      - prooftree
660      - mycustomenv
661"#;
662        let config: Config = serde_yaml::from_str(yaml).unwrap();
663        assert_eq!(
664            config.languages.latex.skip_environments,
665            vec!["prooftree", "mycustomenv"]
666        );
667    }
668
669    #[test]
670    fn default_config_has_empty_latex_skip_environments() {
671        let config = Config::default();
672        assert!(config.languages.latex.skip_environments.is_empty());
673    }
674
675    #[test]
676    fn latex_skip_commands_from_yaml() {
677        let yaml = r#"
678languages:
679  latex:
680    skip_commands:
681      - codefont
682      - myverb
683"#;
684        let config: Config = serde_yaml::from_str(yaml).unwrap();
685        assert_eq!(
686            config.languages.latex.skip_commands,
687            vec!["codefont", "myverb"]
688        );
689    }
690
691    #[test]
692    fn default_config_has_empty_latex_skip_commands() {
693        let config = Config::default();
694        assert!(config.languages.latex.skip_commands.is_empty());
695    }
696}