Skip to main content

lang_check/
config.rs

1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::Path;
5
6#[derive(Debug, Serialize, Deserialize, Clone)]
7pub struct Config {
8    #[serde(default)]
9    pub engines: EngineConfig,
10    #[serde(default)]
11    pub rules: HashMap<String, RuleConfig>,
12    #[serde(default = "default_exclude")]
13    pub exclude: Vec<String>,
14    #[serde(default)]
15    pub auto_fix: Vec<AutoFixRule>,
16    #[serde(default)]
17    pub performance: PerformanceConfig,
18    #[serde(default)]
19    pub dictionaries: DictionaryConfig,
20    #[serde(default)]
21    pub languages: LanguageConfig,
22    #[serde(default)]
23    pub workspace: WorkspaceConfig,
24}
25
26/// Language extension aliasing configuration.
27///
28/// Maps canonical language IDs to additional file extensions.
29/// Built-in extensions (e.g. `.md` → markdown, `.htm` → html) are always
30/// included; entries here add to them.
31///
32/// ```yaml
33/// languages:
34///   extensions:
35///     markdown: [mdx, Rmd]
36///     latex: [sty]
37/// ```
38#[derive(Debug, Serialize, Deserialize, Clone, Default)]
39pub struct LanguageConfig {
40    /// Additional file extensions per language ID (without leading dots).
41    #[serde(default)]
42    pub extensions: HashMap<String, Vec<String>>,
43    /// LaTeX-specific settings.
44    #[serde(default)]
45    pub latex: LaTeXConfig,
46}
47
48/// LaTeX-specific configuration.
49///
50/// ```yaml
51/// languages:
52///   latex:
53///     skip_environments:
54///       - prooftree
55///       - mycustomenv
56/// ```
57#[derive(Debug, Serialize, Deserialize, Clone, Default)]
58pub struct LaTeXConfig {
59    /// Extra environment names to skip during prose extraction.
60    /// These are checked in addition to the built-in skip list.
61    #[serde(default)]
62    pub skip_environments: Vec<String>,
63    /// Extra command names whose arguments should be skipped during prose
64    /// extraction. These are checked in addition to the built-in skip list
65    /// (which includes `texttt`, `verb`, `url`, etc.).
66    #[serde(default)]
67    pub skip_commands: Vec<String>,
68}
69
70/// Workspace-level settings.
71///
72/// ```yaml
73/// workspace:
74///   index_on_open: true
75/// ```
76#[derive(Debug, Serialize, Deserialize, Clone, Default)]
77pub struct WorkspaceConfig {
78    /// Whether to run a full workspace index when the project is opened.
79    /// Default: false (only check documents on open/change).
80    #[serde(default)]
81    pub index_on_open: bool,
82    /// Custom path for the workspace database file. When empty (default),
83    /// databases are stored in the user data directory.
84    #[serde(default)]
85    pub db_path: Option<String>,
86}
87
88/// Performance tuning options. High Performance Mode (HPM) disables
89/// expensive engines and external providers, using only harper-core.
90#[derive(Debug, Serialize, Deserialize, Clone)]
91pub struct PerformanceConfig {
92    /// Enable High Performance Mode (only harper, no LT/externals).
93    #[serde(default)]
94    pub high_performance_mode: bool,
95    /// Debounce delay in milliseconds for LSP on-type checking.
96    #[serde(default = "default_debounce_ms")]
97    pub debounce_ms: u64,
98    /// Maximum file size in bytes to check (0 = unlimited).
99    #[serde(default)]
100    pub max_file_size: usize,
101}
102
103impl Default for PerformanceConfig {
104    fn default() -> Self {
105        Self {
106            high_performance_mode: false,
107            debounce_ms: 300,
108            max_file_size: 0,
109        }
110    }
111}
112
113const fn default_debounce_ms() -> u64 {
114    300
115}
116
117/// Configuration for bundled and additional wordlist dictionaries.
118#[derive(Debug, Serialize, Deserialize, Clone)]
119pub struct DictionaryConfig {
120    /// Whether to load the bundled domain-specific dictionaries (software terms,
121    /// TypeScript, companies, jargon). Default: true.
122    #[serde(default = "default_true")]
123    pub bundled: bool,
124    /// Paths to additional wordlist files (one word per line, `#` comments).
125    /// Relative paths are resolved from the workspace root.
126    #[serde(default)]
127    pub paths: Vec<String>,
128}
129
130impl Default for DictionaryConfig {
131    fn default() -> Self {
132        Self {
133            bundled: true,
134            paths: Vec::new(),
135        }
136    }
137}
138
139/// A user-defined find->replace auto-fix rule.
140#[derive(Debug, Serialize, Deserialize, Clone)]
141pub struct AutoFixRule {
142    /// Pattern to find (plain text, case-sensitive).
143    pub find: String,
144    /// Replacement text.
145    pub replace: String,
146    /// Optional context filter: only apply when surrounding text matches.
147    #[serde(default)]
148    pub context: Option<String>,
149    /// Optional description for the rule.
150    #[serde(default)]
151    pub description: Option<String>,
152}
153
154#[derive(Debug, Serialize, Deserialize, Clone)]
155pub struct EngineConfig {
156    #[serde(default = "default_true")]
157    pub harper: bool,
158    #[serde(default)]
159    pub languagetool: bool,
160    #[serde(default = "default_lt_url")]
161    pub languagetool_url: String,
162    /// Which engine handles English checking: `"harper"` or `"languagetool"`.
163    #[serde(default = "default_english_engine")]
164    pub english_engine: String,
165    /// External checker providers registered via config.
166    #[serde(default)]
167    pub external: Vec<ExternalProvider>,
168    /// WASM checker plugins loaded via Extism.
169    #[serde(default)]
170    pub wasm_plugins: Vec<WasmPlugin>,
171}
172
173/// An external checker binary that communicates via stdin/stdout JSON.
174///
175/// The binary receives `{"text": "...", "language_id": "..."}` on stdin
176/// and returns `[{"start_byte": N, "end_byte": N, "message": "...", ...}]` on stdout.
177#[derive(Debug, Serialize, Deserialize, Clone)]
178pub struct ExternalProvider {
179    /// Display name for this provider.
180    pub name: String,
181    /// Path to the executable.
182    pub command: String,
183    /// Optional arguments to pass to the command.
184    #[serde(default)]
185    pub args: Vec<String>,
186    /// Optional file extensions this provider supports (empty = all).
187    #[serde(default)]
188    pub extensions: Vec<String>,
189}
190
191/// A WASM plugin loaded via Extism.
192///
193/// Plugins must export a `check` function that receives a JSON string
194/// `{"text": "...", "language_id": "..."}` and returns a JSON array of diagnostics.
195#[derive(Debug, Serialize, Deserialize, Clone)]
196pub struct WasmPlugin {
197    /// Display name for this plugin.
198    pub name: String,
199    /// Path to the `.wasm` file (relative to workspace root or absolute).
200    pub path: String,
201    /// Optional file extensions this plugin supports (empty = all).
202    #[serde(default)]
203    pub extensions: Vec<String>,
204}
205
206impl Default for EngineConfig {
207    fn default() -> Self {
208        Self {
209            harper: true,
210            languagetool: false,
211            languagetool_url: "http://localhost:8010".to_string(),
212            english_engine: "harper".to_string(),
213            external: Vec::new(),
214            wasm_plugins: Vec::new(),
215        }
216    }
217}
218
219#[derive(Debug, Serialize, Deserialize, Clone)]
220pub struct RuleConfig {
221    pub severity: Option<String>, // "error", "warning", "info", "hint", "off"
222}
223
224const fn default_true() -> bool {
225    true
226}
227fn default_lt_url() -> String {
228    "http://localhost:8010".to_string()
229}
230fn default_english_engine() -> String {
231    "harper".to_string()
232}
233fn default_exclude() -> Vec<String> {
234    vec![
235        "node_modules/**".to_string(),
236        ".git/**".to_string(),
237        "target/**".to_string(),
238        "dist/**".to_string(),
239        "build/**".to_string(),
240        ".next/**".to_string(),
241        ".nuxt/**".to_string(),
242        "vendor/**".to_string(),
243        "__pycache__/**".to_string(),
244        ".venv/**".to_string(),
245        "venv/**".to_string(),
246        ".tox/**".to_string(),
247        ".mypy_cache/**".to_string(),
248        "*.min.js".to_string(),
249        "*.min.css".to_string(),
250        "*.bundle.js".to_string(),
251        "package-lock.json".to_string(),
252        "yarn.lock".to_string(),
253        "pnpm-lock.yaml".to_string(),
254    ]
255}
256
257impl Config {
258    pub fn load(workspace_root: &Path) -> Result<Self> {
259        // Prefer YAML, fall back to JSON for backward compatibility
260        let yaml_path = workspace_root.join(".languagecheck.yaml");
261        let yml_path = workspace_root.join(".languagecheck.yml");
262        let json_path = workspace_root.join(".languagecheck.json");
263
264        if yaml_path.exists() {
265            let content = std::fs::read_to_string(yaml_path)?;
266            let config: Self = serde_yaml::from_str(&content)?;
267            Ok(config)
268        } else if yml_path.exists() {
269            let content = std::fs::read_to_string(yml_path)?;
270            let config: Self = serde_yaml::from_str(&content)?;
271            Ok(config)
272        } else if json_path.exists() {
273            let content = std::fs::read_to_string(json_path)?;
274            let config: Self = serde_json::from_str(&content)?;
275            Ok(config)
276        } else {
277            Ok(Self::default())
278        }
279    }
280
281    /// Apply user-defined auto-fix rules to the given text, returning the modified text
282    /// and the number of replacements made.
283    #[must_use]
284    pub fn apply_auto_fixes(&self, text: &str) -> (String, usize) {
285        let mut result = text.to_string();
286        let mut total = 0;
287
288        for rule in &self.auto_fix {
289            if let Some(ctx) = &rule.context
290                && !result.contains(ctx.as_str())
291            {
292                continue;
293            }
294            let count = result.matches(&rule.find).count();
295            if count > 0 {
296                result = result.replace(&rule.find, &rule.replace);
297                total += count;
298            }
299        }
300
301        (result, total)
302    }
303}
304
305impl Default for Config {
306    fn default() -> Self {
307        Self {
308            engines: EngineConfig::default(),
309            rules: HashMap::new(),
310            exclude: default_exclude(),
311            auto_fix: Vec::new(),
312            performance: PerformanceConfig::default(),
313            dictionaries: DictionaryConfig::default(),
314            languages: LanguageConfig::default(),
315            workspace: WorkspaceConfig::default(),
316        }
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    #[test]
325    fn default_config_has_harper_enabled_lt_disabled() {
326        let config = Config::default();
327        assert!(config.engines.harper);
328        assert!(!config.engines.languagetool);
329    }
330
331    #[test]
332    fn default_config_has_standard_excludes() {
333        let config = Config::default();
334        assert!(config.exclude.contains(&"node_modules/**".to_string()));
335        assert!(config.exclude.contains(&".git/**".to_string()));
336        assert!(config.exclude.contains(&"target/**".to_string()));
337        assert!(config.exclude.contains(&"dist/**".to_string()));
338        assert!(config.exclude.contains(&"vendor/**".to_string()));
339    }
340
341    #[test]
342    fn default_lt_url() {
343        let config = Config::default();
344        assert_eq!(config.engines.languagetool_url, "http://localhost:8010");
345    }
346
347    #[test]
348    fn default_english_engine_is_harper() {
349        let config = Config::default();
350        assert_eq!(config.engines.english_engine, "harper");
351    }
352
353    #[test]
354    fn english_engine_from_yaml() {
355        let yaml = r#"
356engines:
357  english_engine: languagetool
358  languagetool: true
359"#;
360        let config: Config = serde_yaml::from_str(yaml).unwrap();
361        assert_eq!(config.engines.english_engine, "languagetool");
362    }
363
364    #[test]
365    fn load_from_json_string() {
366        let json = r#"{
367            "engines": { "harper": true, "languagetool": false },
368            "rules": { "spelling.typo": { "severity": "warning" } }
369        }"#;
370        let config: Config = serde_json::from_str(json).unwrap();
371        assert!(config.engines.harper);
372        assert!(!config.engines.languagetool);
373        assert!(config.rules.contains_key("spelling.typo"));
374        assert_eq!(
375            config.rules["spelling.typo"].severity.as_deref(),
376            Some("warning")
377        );
378    }
379
380    #[test]
381    fn load_partial_json_uses_defaults() {
382        let json = r#"{}"#;
383        let config: Config = serde_json::from_str(json).unwrap();
384        assert!(config.engines.harper);
385        assert!(!config.engines.languagetool);
386        assert!(config.rules.is_empty());
387    }
388
389    #[test]
390    fn load_from_json_file() {
391        let dir = std::env::temp_dir().join("lang_check_test_config_json");
392        let _ = std::fs::remove_dir_all(&dir);
393        std::fs::create_dir_all(&dir).unwrap();
394
395        let config_path = dir.join(".languagecheck.json");
396        std::fs::write(
397            &config_path,
398            r#"{"engines": {"harper": false, "languagetool": true}}"#,
399        )
400        .unwrap();
401
402        let config = Config::load(&dir).unwrap();
403        assert!(!config.engines.harper);
404        assert!(config.engines.languagetool);
405
406        let _ = std::fs::remove_dir_all(&dir);
407    }
408
409    #[test]
410    fn load_from_yaml_file() {
411        let dir = std::env::temp_dir().join("lang_check_test_config_yaml");
412        let _ = std::fs::remove_dir_all(&dir);
413        std::fs::create_dir_all(&dir).unwrap();
414
415        let config_path = dir.join(".languagecheck.yaml");
416        std::fs::write(
417            &config_path,
418            "engines:\n  harper: false\n  languagetool: true\n",
419        )
420        .unwrap();
421
422        let config = Config::load(&dir).unwrap();
423        assert!(!config.engines.harper);
424        assert!(config.engines.languagetool);
425
426        let _ = std::fs::remove_dir_all(&dir);
427    }
428
429    #[test]
430    fn yaml_takes_precedence_over_json() {
431        let dir = std::env::temp_dir().join("lang_check_test_config_precedence");
432        let _ = std::fs::remove_dir_all(&dir);
433        std::fs::create_dir_all(&dir).unwrap();
434
435        // Write both files with different values
436        std::fs::write(
437            dir.join(".languagecheck.yaml"),
438            "engines:\n  harper: false\n",
439        )
440        .unwrap();
441        std::fs::write(
442            dir.join(".languagecheck.json"),
443            r#"{"engines": {"harper": true}}"#,
444        )
445        .unwrap();
446
447        let config = Config::load(&dir).unwrap();
448        // YAML should win
449        assert!(!config.engines.harper);
450
451        let _ = std::fs::remove_dir_all(&dir);
452    }
453
454    #[test]
455    fn load_missing_file_returns_default() {
456        let dir = std::env::temp_dir().join("lang_check_test_config_missing");
457        let _ = std::fs::remove_dir_all(&dir);
458        std::fs::create_dir_all(&dir).unwrap();
459
460        let config = Config::load(&dir).unwrap();
461        assert!(config.engines.harper);
462
463        let _ = std::fs::remove_dir_all(&dir);
464    }
465
466    #[test]
467    fn auto_fix_simple_replacement() {
468        let config = Config {
469            auto_fix: vec![AutoFixRule {
470                find: "teh".to_string(),
471                replace: "the".to_string(),
472                context: None,
473                description: None,
474            }],
475            ..Config::default()
476        };
477        let (result, count) = config.apply_auto_fixes("Fix teh typo in teh text.");
478        assert_eq!(result, "Fix the typo in the text.");
479        assert_eq!(count, 2);
480    }
481
482    #[test]
483    fn auto_fix_with_context_filter() {
484        let config = Config {
485            auto_fix: vec![AutoFixRule {
486                find: "colour".to_string(),
487                replace: "color".to_string(),
488                context: Some("American".to_string()),
489                description: Some("Use American spelling".to_string()),
490            }],
491            ..Config::default()
492        };
493        // Context matches — replacement should happen
494        let (result, count) = config.apply_auto_fixes("American English: the colour is red.");
495        assert_eq!(result, "American English: the color is red.");
496        assert_eq!(count, 1);
497
498        // Context does not match — no replacement
499        let (result, count) = config.apply_auto_fixes("British English: the colour is red.");
500        assert_eq!(result, "British English: the colour is red.");
501        assert_eq!(count, 0);
502    }
503
504    #[test]
505    fn auto_fix_no_match() {
506        let config = Config {
507            auto_fix: vec![AutoFixRule {
508                find: "foo".to_string(),
509                replace: "bar".to_string(),
510                context: None,
511                description: None,
512            }],
513            ..Config::default()
514        };
515        let (result, count) = config.apply_auto_fixes("No matches here.");
516        assert_eq!(result, "No matches here.");
517        assert_eq!(count, 0);
518    }
519
520    #[test]
521    fn auto_fix_multiple_rules() {
522        let config = Config {
523            auto_fix: vec![
524                AutoFixRule {
525                    find: "recieve".to_string(),
526                    replace: "receive".to_string(),
527                    context: None,
528                    description: None,
529                },
530                AutoFixRule {
531                    find: "seperate".to_string(),
532                    replace: "separate".to_string(),
533                    context: None,
534                    description: None,
535                },
536            ],
537            ..Config::default()
538        };
539        let (result, count) = config.apply_auto_fixes("Please recieve the seperate package.");
540        assert_eq!(result, "Please receive the separate package.");
541        assert_eq!(count, 2);
542    }
543
544    #[test]
545    fn auto_fix_loads_from_yaml() {
546        let yaml = r#"
547auto_fix:
548  - find: "teh"
549    replace: "the"
550    description: "Fix common typo"
551  - find: "colour"
552    replace: "color"
553    context: "American"
554"#;
555        let config: Config = serde_yaml::from_str(yaml).unwrap();
556        assert_eq!(config.auto_fix.len(), 2);
557        assert_eq!(config.auto_fix[0].find, "teh");
558        assert_eq!(config.auto_fix[0].replace, "the");
559        assert_eq!(
560            config.auto_fix[0].description.as_deref(),
561            Some("Fix common typo")
562        );
563        assert_eq!(config.auto_fix[1].context.as_deref(), Some("American"));
564    }
565
566    #[test]
567    fn default_config_has_empty_auto_fix() {
568        let config = Config::default();
569        assert!(config.auto_fix.is_empty());
570    }
571
572    #[test]
573    fn external_providers_from_yaml() {
574        let yaml = r#"
575engines:
576  harper: true
577  languagetool: false
578  external:
579    - name: vale
580      command: /usr/bin/vale
581      args: ["--output", "JSON"]
582      extensions: [md, rst]
583    - name: custom-checker
584      command: ./my-checker
585"#;
586        let config: Config = serde_yaml::from_str(yaml).unwrap();
587        assert_eq!(config.engines.external.len(), 2);
588        assert_eq!(config.engines.external[0].name, "vale");
589        assert_eq!(config.engines.external[0].command, "/usr/bin/vale");
590        assert_eq!(config.engines.external[0].args, vec!["--output", "JSON"]);
591        assert_eq!(config.engines.external[0].extensions, vec!["md", "rst"]);
592        assert_eq!(config.engines.external[1].name, "custom-checker");
593        assert!(config.engines.external[1].args.is_empty());
594    }
595
596    #[test]
597    fn default_config_has_no_external_providers() {
598        let config = Config::default();
599        assert!(config.engines.external.is_empty());
600    }
601
602    #[test]
603    fn wasm_plugins_from_yaml() {
604        let yaml = r#"
605engines:
606  harper: true
607  wasm_plugins:
608    - name: custom-checker
609      path: .languagecheck/plugins/checker.wasm
610      extensions: [md, html]
611    - name: style-linter
612      path: /opt/plugins/style.wasm
613"#;
614        let config: Config = serde_yaml::from_str(yaml).unwrap();
615        assert_eq!(config.engines.wasm_plugins.len(), 2);
616        assert_eq!(config.engines.wasm_plugins[0].name, "custom-checker");
617        assert_eq!(
618            config.engines.wasm_plugins[0].path,
619            ".languagecheck/plugins/checker.wasm"
620        );
621        assert_eq!(
622            config.engines.wasm_plugins[0].extensions,
623            vec!["md", "html"]
624        );
625        assert_eq!(config.engines.wasm_plugins[1].name, "style-linter");
626        assert!(config.engines.wasm_plugins[1].extensions.is_empty());
627    }
628
629    #[test]
630    fn default_config_has_no_wasm_plugins() {
631        let config = Config::default();
632        assert!(config.engines.wasm_plugins.is_empty());
633    }
634
635    #[test]
636    fn performance_config_defaults() {
637        let config = Config::default();
638        assert!(!config.performance.high_performance_mode);
639        assert_eq!(config.performance.debounce_ms, 300);
640        assert_eq!(config.performance.max_file_size, 0);
641    }
642
643    #[test]
644    fn performance_config_from_yaml() {
645        let yaml = r#"
646performance:
647  high_performance_mode: true
648  debounce_ms: 500
649  max_file_size: 1048576
650"#;
651        let config: Config = serde_yaml::from_str(yaml).unwrap();
652        assert!(config.performance.high_performance_mode);
653        assert_eq!(config.performance.debounce_ms, 500);
654        assert_eq!(config.performance.max_file_size, 1_048_576);
655    }
656
657    #[test]
658    fn latex_skip_environments_from_yaml() {
659        let yaml = r#"
660languages:
661  latex:
662    skip_environments:
663      - prooftree
664      - mycustomenv
665"#;
666        let config: Config = serde_yaml::from_str(yaml).unwrap();
667        assert_eq!(
668            config.languages.latex.skip_environments,
669            vec!["prooftree", "mycustomenv"]
670        );
671    }
672
673    #[test]
674    fn default_config_has_empty_latex_skip_environments() {
675        let config = Config::default();
676        assert!(config.languages.latex.skip_environments.is_empty());
677    }
678
679    #[test]
680    fn latex_skip_commands_from_yaml() {
681        let yaml = r#"
682languages:
683  latex:
684    skip_commands:
685      - codefont
686      - myverb
687"#;
688        let config: Config = serde_yaml::from_str(yaml).unwrap();
689        assert_eq!(
690            config.languages.latex.skip_commands,
691            vec!["codefont", "myverb"]
692        );
693    }
694
695    #[test]
696    fn default_config_has_empty_latex_skip_commands() {
697        let config = Config::default();
698        assert!(config.languages.latex.skip_commands.is_empty());
699    }
700}