lang-check 0.4.4

mod proselint;
mod vale;

pub use proselint::ProselintEngine;
pub use vale::ValeEngine;

use crate::checker::{Diagnostic, Severity};
use anyhow::Result;
use extism::{Manifest, Plugin, Wasm};
use harper_core::{
    Dialect, Document, Lrc,
    linting::{LintGroup, Linter},
    parsers::Markdown,
    spell::FstDictionary,
};
use serde::Deserialize;
use std::path::PathBuf;
use tracing::{debug, warn};

#[async_trait::async_trait]
pub trait Engine {
    fn name(&self) -> &'static str;
    async fn check(&mut self, text: &str, language_id: &str) -> Result<Vec<Diagnostic>>;
    /// BCP-47 primary subtags this engine supports. Empty = all languages.
    fn supported_languages(&self) -> Vec<&'static str> {
        vec![]
    }
}

/// Returns `true` if `engine` supports the given BCP-47 `lang_tag`.
///
/// Matching is on the primary subtag: `"en-US"` matches an engine that
/// advertises `"en"`. An engine with an empty list is a wildcard (supports all).
pub fn engine_supports_language(engine: &(dyn Engine + Send), lang_tag: &str) -> bool {
    let supported = engine.supported_languages();
    if supported.is_empty() {
        return true;
    }
    let primary = lang_tag.split('-').next().unwrap_or(lang_tag);
    supported.iter().any(|s| s.eq_ignore_ascii_case(primary))
}

/// Build a lookup from Unicode-scalar (char) index → UTF-8 byte offset, with a
/// final entry for the end-of-text index (char count → `text.len()`).
///
/// The wire protocol reports diagnostic spans as UTF-8 byte offsets, but some
/// engines count in `char`s (e.g. Harper, which operates on a `Vec<char>`).
/// Without this conversion, any multi-byte character (em-dash `—`, accented
/// letters, …) before a diagnostic shifts every later underline.
fn char_to_byte_table(text: &str) -> Vec<u32> {
    #[allow(clippy::cast_possible_truncation)]
    let mut table: Vec<u32> = text.char_indices().map(|(b, _)| b as u32).collect();
    #[allow(clippy::cast_possible_truncation)]
    table.push(text.len() as u32);
    table
}

/// Build a lookup from UTF-16 code-unit index → UTF-8 byte offset, with a final
/// entry for the end-of-text index.
///
/// Used for engines that report UTF-16 offsets (e.g. `LanguageTool`, a Java
/// service whose char offsets are UTF-16 code units). Astral chars occupy two
/// UTF-16 units; both map to the char's starting byte.
fn utf16_to_byte_table(text: &str) -> Vec<u32> {
    let mut table: Vec<u32> = Vec::with_capacity(text.len() + 1);
    for (byte_idx, ch) in text.char_indices() {
        #[allow(clippy::cast_possible_truncation)]
        let b = byte_idx as u32;
        for _ in 0..ch.len_utf16() {
            table.push(b);
        }
    }
    #[allow(clippy::cast_possible_truncation)]
    table.push(text.len() as u32);
    table
}

/// Clamp-safe lookup into an offset table built by [`char_to_byte_table`] or
/// [`utf16_to_byte_table`]. Out-of-range indices map to end-of-text.
fn lookup_offset(table: &[u32], idx: usize) -> u32 {
    table
        .get(idx)
        .copied()
        .unwrap_or_else(|| table.last().copied().unwrap_or(0))
}

pub struct HarperEngine {
    linter: LintGroup,
    dict: Lrc<FstDictionary>,
}

impl HarperEngine {
    #[must_use]
    pub fn new(config: &crate::config::HarperConfig) -> Self {
        let dialect = match config.dialect.as_str() {
            "British" => Dialect::British,
            "Canadian" => Dialect::Canadian,
            "Australian" => Dialect::Australian,
            _ => Dialect::American,
        };
        let dict = FstDictionary::curated();
        let mut linter = LintGroup::new_curated(dict.clone(), dialect);

        for (rule, enabled) in &config.linters {
            linter.config.set_rule_enabled(rule, *enabled);
        }

        Self { linter, dict }
    }
}

#[async_trait::async_trait]
impl Engine for HarperEngine {
    fn name(&self) -> &'static str {
        "harper"
    }

    fn supported_languages(&self) -> Vec<&'static str> {
        vec!["en"]
    }

    async fn check(&mut self, text: &str, _language_id: &str) -> Result<Vec<Diagnostic>> {
        let document = Document::new(text, &Markdown::default(), self.dict.as_ref());
        let lints = self.linter.lint(&document);

        // Harper spans are char indices; the protocol wants UTF-8 byte offsets.
        let char_to_byte = char_to_byte_table(text);

        let diagnostics = lints
            .into_iter()
            .map(|lint| {
                let suggestions = lint
                    .suggestions
                    .into_iter()
                    .map(|s| match s {
                        harper_core::linting::Suggestion::ReplaceWith(chars) => {
                            chars.into_iter().collect::<String>()
                        }
                        harper_core::linting::Suggestion::InsertAfter(chars) => {
                            let content: String = chars.into_iter().collect();
                            format!("Insert \"{content}\"")
                        }
                        // Empty string replacement = delete the diagnostic range
                        harper_core::linting::Suggestion::Remove => String::new(),
                    })
                    .collect();

                Diagnostic {
                    start_byte: lookup_offset(&char_to_byte, lint.span.start),
                    end_byte: lookup_offset(&char_to_byte, lint.span.end),
                    message: lint.message,
                    suggestions,
                    rule_id: format!("harper.{:?}", lint.lint_kind),
                    severity: Severity::Warning as i32,
                    unified_id: String::new(), // Will be filled by normalizer
                    confidence: 0.8,
                }
            })
            .collect();

        Ok(diagnostics)
    }
}

pub struct LanguageToolEngine {
    url: String,
    level: String,
    mother_tongue: Option<String>,
    disabled_rules: Vec<String>,
    enabled_rules: Vec<String>,
    disabled_categories: Vec<String>,
    enabled_categories: Vec<String>,
    client: reqwest::Client,
}

#[derive(Deserialize)]
struct LTResponse {
    matches: Vec<LTMatch>,
}

#[derive(Deserialize)]
struct LTMatch {
    message: String,
    offset: usize,
    length: usize,
    replacements: Vec<LTReplacement>,
    rule: LTRule,
}

#[derive(Deserialize)]
struct LTReplacement {
    value: String,
}

#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct LTRule {
    id: String,
    issue_type: String,
}

impl LanguageToolEngine {
    #[must_use]
    pub fn new(config: &crate::config::LanguageToolConfig) -> Self {
        let client = reqwest::Client::builder()
            .connect_timeout(std::time::Duration::from_secs(3))
            .timeout(std::time::Duration::from_secs(10))
            .build()
            .unwrap_or_default();
        Self {
            url: config.url.clone(),
            level: config.level.clone(),
            mother_tongue: config.mother_tongue.clone(),
            disabled_rules: config.disabled_rules.clone(),
            enabled_rules: config.enabled_rules.clone(),
            disabled_categories: config.disabled_categories.clone(),
            enabled_categories: config.enabled_categories.clone(),
            client,
        }
    }
}

#[allow(clippy::too_many_lines, clippy::cast_possible_truncation)]
#[async_trait::async_trait]
impl Engine for LanguageToolEngine {
    fn name(&self) -> &'static str {
        "languagetool"
    }

    async fn check(&mut self, text: &str, language_id: &str) -> Result<Vec<Diagnostic>> {
        let url = format!("{}/v2/check", self.url);

        // language_id is now a BCP-47 tag from the orchestrator (e.g. "en-US", "de-DE")
        let lt_lang = language_id;

        debug!(
            url = %url,
            language = lt_lang,
            text_len = text.len(),
            "LanguageTool request"
        );

        let mut form_params: Vec<(&str, String)> = vec![
            ("text", text.to_string()),
            ("language", lt_lang.to_string()),
        ];
        if self.level != "default" {
            form_params.push(("level", self.level.clone()));
        }
        if let Some(ref mt) = self.mother_tongue {
            form_params.push(("motherTongue", mt.clone()));
        }
        if !self.disabled_rules.is_empty() {
            form_params.push(("disabledRules", self.disabled_rules.join(",")));
        }
        if !self.enabled_rules.is_empty() {
            form_params.push(("enabledRules", self.enabled_rules.join(",")));
        }
        if !self.disabled_categories.is_empty() {
            form_params.push(("disabledCategories", self.disabled_categories.join(",")));
        }
        if !self.enabled_categories.is_empty() {
            form_params.push(("enabledCategories", self.enabled_categories.join(",")));
        }

        let request_start = std::time::Instant::now();
        let response = match self.client.post(&url).form(&form_params).send().await {
            Ok(r) => {
                let status = r.status();
                debug!(
                    status = %status,
                    elapsed_ms = request_start.elapsed().as_millis() as u64,
                    "LanguageTool HTTP response"
                );
                if !status.is_success() {
                    let body = r.text().await.unwrap_or_default();
                    warn!(
                        status = %status,
                        body = %body,
                        "LanguageTool returned non-200"
                    );
                    return Err(anyhow::anyhow!("LanguageTool HTTP {status}: {body}"));
                }
                r
            }
            Err(e) => {
                warn!(
                    elapsed_ms = request_start.elapsed().as_millis() as u64,
                    "LanguageTool connection error: {e}"
                );
                return Err(anyhow::anyhow!("LanguageTool connection error: {e}"));
            }
        };

        let res = match response.json::<LTResponse>().await {
            Ok(r) => r,
            Err(e) => {
                warn!("LanguageTool JSON parse error: {e}");
                return Err(anyhow::anyhow!("LanguageTool JSON parse error: {e}"));
            }
        };

        debug!(
            matches = res.matches.len(),
            elapsed_ms = request_start.elapsed().as_millis() as u64,
            "LanguageTool check complete"
        );

        // LanguageTool reports offsets in UTF-16 code units; convert to bytes.
        let utf16_to_byte = utf16_to_byte_table(text);

        let diagnostics = res
            .matches
            .into_iter()
            .map(|m| {
                let severity = match m.rule.issue_type.as_str() {
                    "misspelling" => Severity::Error,
                    "typographical" => Severity::Warning,
                    _ => Severity::Information,
                };

                Diagnostic {
                    start_byte: lookup_offset(&utf16_to_byte, m.offset),
                    end_byte: lookup_offset(&utf16_to_byte, m.offset + m.length),
                    message: m.message,
                    suggestions: m.replacements.into_iter().map(|r| r.value).collect(),
                    rule_id: format!("languagetool.{}", m.rule.id),
                    severity: severity as i32,
                    unified_id: String::new(), // Will be filled by normalizer
                    confidence: 0.8,
                }
            })
            .collect();

        Ok(diagnostics)
    }
}

/// An external checker engine that communicates with a subprocess via stdin/stdout JSON.
pub struct ExternalEngine {
    name: String,
    command: String,
    args: Vec<String>,
}

impl ExternalEngine {
    #[must_use]
    pub const fn new(name: String, command: String, args: Vec<String>) -> Self {
        Self {
            name,
            command,
            args,
        }
    }
}

/// JSON request sent to the external process on stdin.
#[derive(serde::Serialize)]
struct ExternalRequest<'a> {
    text: &'a str,
    language_id: &'a str,
}

/// JSON diagnostic returned by the external process on stdout.
#[derive(Deserialize)]
struct ExternalDiagnostic {
    start_byte: u32,
    end_byte: u32,
    message: String,
    #[serde(default)]
    suggestions: Vec<String>,
    #[serde(default)]
    rule_id: String,
    #[serde(default = "default_severity_value")]
    severity: i32,
    #[serde(default)]
    confidence: f32,
}

const fn default_severity_value() -> i32 {
    Severity::Warning as i32
}

#[async_trait::async_trait]
impl Engine for ExternalEngine {
    fn name(&self) -> &'static str {
        "external"
    }

    async fn check(&mut self, text: &str, language_id: &str) -> Result<Vec<Diagnostic>> {
        use tokio::process::Command;

        let request = ExternalRequest { text, language_id };
        let input = serde_json::to_string(&request)?;

        let output = match Command::new(&self.command)
            .args(&self.args)
            .stdin(std::process::Stdio::piped())
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
            .spawn()
        {
            Ok(mut child) => {
                use tokio::io::AsyncWriteExt;
                if let Some(mut stdin) = child.stdin.take() {
                    // Ignore write errors — the process may exit before reading stdin.
                    let _ = stdin.write_all(input.as_bytes()).await;
                    let _ = stdin.shutdown().await;
                }
                child.wait_with_output().await?
            }
            Err(e) => {
                warn!(provider = %self.name, "Failed to spawn external provider: {e}");
                return Ok(vec![]);
            }
        };

        if !output.status.success() {
            let stderr = String::from_utf8_lossy(&output.stderr);
            warn!(
                provider = %self.name,
                status = %output.status,
                stderr = stderr.trim(),
                "External provider exited with error"
            );
            return Ok(vec![]);
        }

        let stdout = String::from_utf8_lossy(&output.stdout);
        let ext_diagnostics: Vec<ExternalDiagnostic> = match serde_json::from_str(&stdout) {
            Ok(d) => d,
            Err(e) => {
                warn!(provider = %self.name, "Failed to parse external provider output: {e}");
                return Ok(vec![]);
            }
        };

        let diagnostics = ext_diagnostics
            .into_iter()
            .map(|ed| {
                let rule_id = if ed.rule_id.is_empty() {
                    format!("external.{}", self.name)
                } else {
                    format!("external.{}.{}", self.name, ed.rule_id)
                };
                Diagnostic {
                    start_byte: ed.start_byte,
                    end_byte: ed.end_byte,
                    message: ed.message,
                    suggestions: ed.suggestions,
                    rule_id,
                    severity: ed.severity,
                    unified_id: String::new(),
                    confidence: if ed.confidence > 0.0 {
                        ed.confidence
                    } else {
                        0.7
                    },
                }
            })
            .collect();

        Ok(diagnostics)
    }
}

/// A WASM checker plugin loaded via Extism.
///
/// The plugin must export a `check` function that accepts a JSON string
/// `{"text": "...", "language_id": "..."}` and returns a JSON array of
/// diagnostics matching the `ExternalDiagnostic` schema.
pub struct WasmEngine {
    name: String,
    plugin: Plugin,
}

// SAFETY: Extism Plugin is not Send by default because it wraps a wasmtime Store
// which holds raw pointers. However, we only ever access the plugin from a single
// &mut self call at a time (the Engine trait takes &mut self), so this is safe
// as long as we don't share across threads simultaneously.
unsafe impl Send for WasmEngine {}

impl WasmEngine {
    /// Create a new WASM engine from a `.wasm` file path.
    pub fn new(name: String, wasm_path: PathBuf) -> Result<Self> {
        let wasm = Wasm::file(wasm_path);
        let manifest = Manifest::new([wasm]);
        let plugin = Plugin::new(&manifest, [], true)?;
        Ok(Self { name, plugin })
    }

    /// Create a new WASM engine from raw bytes (useful for testing).
    pub fn from_bytes(name: String, wasm_bytes: &[u8]) -> Result<Self> {
        let wasm = Wasm::data(wasm_bytes.to_vec());
        let manifest = Manifest::new([wasm]);
        let plugin = Plugin::new(&manifest, [], true)?;
        Ok(Self { name, plugin })
    }
}

#[async_trait::async_trait]
impl Engine for WasmEngine {
    fn name(&self) -> &'static str {
        "wasm"
    }

    async fn check(&mut self, text: &str, language_id: &str) -> Result<Vec<Diagnostic>> {
        let request = serde_json::json!({
            "text": text,
            "language_id": language_id,
        });
        let input = request.to_string();

        let output = match self.plugin.call::<&str, &str>("check", &input) {
            Ok(result) => result.to_string(),
            Err(e) => {
                warn!(plugin = %self.name, "WASM plugin call failed: {e}");
                return Ok(vec![]);
            }
        };

        let ext_diagnostics: Vec<ExternalDiagnostic> = match serde_json::from_str(&output) {
            Ok(d) => d,
            Err(e) => {
                warn!(plugin = %self.name, "Failed to parse WASM plugin output: {e}");
                return Ok(vec![]);
            }
        };

        let diagnostics = ext_diagnostics
            .into_iter()
            .map(|ed| {
                let rule_id = if ed.rule_id.is_empty() {
                    format!("wasm.{}", self.name)
                } else {
                    format!("wasm.{}.{}", self.name, ed.rule_id)
                };
                Diagnostic {
                    start_byte: ed.start_byte,
                    end_byte: ed.end_byte,
                    message: ed.message,
                    suggestions: ed.suggestions,
                    rule_id,
                    severity: ed.severity,
                    unified_id: String::new(),
                    confidence: if ed.confidence > 0.0 {
                        ed.confidence
                    } else {
                        0.7
                    },
                }
            })
            .collect();

        Ok(diagnostics)
    }
}

/// Discover WASM plugins from a directory (e.g. `.languagecheck/plugins/`).
/// Returns a list of (name, path) pairs for each `.wasm` file found.
#[must_use]
pub fn discover_wasm_plugins(plugin_dir: &std::path::Path) -> Vec<(String, PathBuf)> {
    let Ok(entries) = std::fs::read_dir(plugin_dir) else {
        return Vec::new();
    };

    entries
        .filter_map(|entry| {
            let entry = entry.ok()?;
            let path = entry.path();
            if path.extension().is_some_and(|e| e == "wasm") {
                let name = path
                    .file_stem()
                    .map(|s| s.to_string_lossy().to_string())
                    .unwrap_or_default();
                Some((name, path))
            } else {
                None
            }
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn char_to_byte_handles_multibyte() {
        // "a—b": 'a'=1 byte, '—'(U+2014)=3 bytes, 'b'=1 byte.
        let table = char_to_byte_table("a—b");
        assert_eq!(table, vec![0, 1, 4, 5]); // char idx 0,1,2 -> bytes; 3 -> len
        assert_eq!(lookup_offset(&table, 2), 4); // 'b' starts at byte 4, not 2
        assert_eq!(lookup_offset(&table, 3), 5); // end-of-text
        assert_eq!(lookup_offset(&table, 99), 5); // clamp
    }

    #[test]
    fn utf16_to_byte_handles_astral() {
        // "a😀b": 'a'=1 byte/1 unit, '😀'(U+1F600)=4 bytes/2 units, 'b'=1 byte.
        let table = utf16_to_byte_table("a😀b");
        // units: 0->'a'@0, 1&2->'😀'@1, 3->'b'@5, 4->end@6
        assert_eq!(table, vec![0, 1, 1, 5, 6]);
        assert_eq!(lookup_offset(&table, 3), 5); // 'b' after surrogate pair
    }

    #[test]
    fn em_dash_does_not_shift_byte_offsets() {
        // A char-index span (Harper-style) for "b" in "a—b" is (2, 3); after
        // conversion it must point at bytes (4, 5), not (2, 3).
        let table = char_to_byte_table("a—b");
        assert_eq!(lookup_offset(&table, 2), 4);
        assert_eq!(lookup_offset(&table, 3), 5);
    }

    #[tokio::test]
    async fn test_harper_engine() -> Result<()> {
        let mut engine = HarperEngine::new(&crate::config::HarperConfig::default());
        let text = "This is an test.";
        let diagnostics = engine.check(text, "en-US").await?;

        // Harper should find "an test" error
        assert!(!diagnostics.is_empty());

        Ok(())
    }

    #[tokio::test]
    async fn harper_offsets_are_bytes_after_em_dash() -> Result<()> {
        // An em-dash before the error must not shift the diagnostic's byte span.
        let mut engine = HarperEngine::new(&crate::config::HarperConfig::default());
        let text = "Some prose — this is an test.";
        let diagnostics = engine.check(text, "en-US").await?;
        assert!(!diagnostics.is_empty(), "Harper should flag 'an test'");

        // Every diagnostic span must land on valid UTF-8 byte boundaries of the
        // ORIGINAL text and slice to non-empty content (char-index spans would
        // fall short by 2 bytes per em-dash and could split the multibyte char).
        for d in &diagnostics {
            let (s, e) = (d.start_byte as usize, d.end_byte as usize);
            assert!(text.is_char_boundary(s), "start {s} not a char boundary");
            assert!(text.is_char_boundary(e), "end {e} not a char boundary");
            assert!(s <= e && e <= text.len(), "span ({s},{e}) out of range");
        }
        Ok(())
    }

    #[tokio::test]
    async fn external_engine_with_echo() -> Result<()> {
        // Use a simple shell command that echoes a valid JSON response
        let mut engine = ExternalEngine::new(
            "test-provider".to_string(),
            "sh".to_string(),
            vec![
                "-c".to_string(),
                r#"cat > /dev/null; echo '[{"start_byte":0,"end_byte":4,"message":"test issue","suggestions":["fix"],"rule_id":"test.rule","severity":2}]'"#.to_string(),
            ],
        );

        let diagnostics = engine.check("some text", "markdown").await?;
        assert_eq!(diagnostics.len(), 1);
        assert_eq!(diagnostics[0].message, "test issue");
        assert_eq!(diagnostics[0].rule_id, "external.test-provider.test.rule");
        assert_eq!(diagnostics[0].suggestions, vec!["fix"]);
        assert_eq!(diagnostics[0].start_byte, 0);
        assert_eq!(diagnostics[0].end_byte, 4);

        Ok(())
    }

    #[tokio::test]
    async fn external_engine_missing_binary() -> Result<()> {
        let mut engine = ExternalEngine::new(
            "nonexistent".to_string(),
            "/nonexistent/binary".to_string(),
            vec![],
        );

        // Should not error, just return empty
        let diagnostics = engine.check("text", "markdown").await?;
        assert!(diagnostics.is_empty());

        Ok(())
    }

    #[tokio::test]
    async fn external_engine_bad_json_output() -> Result<()> {
        let mut engine = ExternalEngine::new(
            "bad-json".to_string(),
            "echo".to_string(),
            vec!["not json".to_string()],
        );

        // Should not error, just return empty
        let diagnostics = engine.check("text", "markdown").await?;
        assert!(diagnostics.is_empty());

        Ok(())
    }

    #[test]
    fn wasm_engine_invalid_bytes_returns_error() {
        let result = WasmEngine::from_bytes("bad-plugin".to_string(), b"not a wasm file");
        assert!(result.is_err());
    }

    #[test]
    fn wasm_engine_missing_file_returns_error() {
        let result = WasmEngine::new(
            "missing".to_string(),
            PathBuf::from("/nonexistent/plugin.wasm"),
        );
        assert!(result.is_err());
    }

    #[test]
    fn discover_wasm_plugins_empty_dir() {
        let dir = std::env::temp_dir().join("lang_check_test_wasm_empty");
        let _ = std::fs::remove_dir_all(&dir);
        std::fs::create_dir_all(&dir).unwrap();

        let plugins = discover_wasm_plugins(&dir);
        assert!(plugins.is_empty());

        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn discover_wasm_plugins_finds_wasm_files() {
        let dir = std::env::temp_dir().join("lang_check_test_wasm_discover");
        let _ = std::fs::remove_dir_all(&dir);
        std::fs::create_dir_all(&dir).unwrap();

        // Create fake .wasm files and a non-wasm file
        std::fs::write(dir.join("checker.wasm"), b"fake").unwrap();
        std::fs::write(dir.join("linter.wasm"), b"fake").unwrap();
        std::fs::write(dir.join("readme.txt"), b"not a plugin").unwrap();

        let mut plugins = discover_wasm_plugins(&dir);
        plugins.sort_by(|a, b| a.0.cmp(&b.0));

        assert_eq!(plugins.len(), 2);
        assert_eq!(plugins[0].0, "checker");
        assert_eq!(plugins[1].0, "linter");
        assert!(plugins[0].1.ends_with("checker.wasm"));
        assert!(plugins[1].1.ends_with("linter.wasm"));

        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn discover_wasm_plugins_nonexistent_dir() {
        let plugins = discover_wasm_plugins(std::path::Path::new("/nonexistent/dir"));
        assert!(plugins.is_empty());
    }

    /// Live integration test — requires LT Docker on localhost:8010.
    /// Run with: `cargo test lt_engine_live -- --ignored --nocapture`
    #[tokio::test]
    #[ignore]
    async fn lt_engine_live() -> Result<()> {
        // Initialize tracing for visible output
        let _ = tracing_subscriber::fmt()
            .with_env_filter("debug")
            .with_writer(std::io::stderr)
            .with_target(false)
            .try_init();

        let mut engine = LanguageToolEngine::new(&crate::config::LanguageToolConfig::default());
        let text = "This is a sentnce with erors.";
        let diagnostics = engine.check(text, "markdown").await?;

        println!("LT returned {} diagnostics:", diagnostics.len());
        for d in &diagnostics {
            println!(
                "  [{}-{}] {} (rule: {}, suggestions: {:?})",
                d.start_byte, d.end_byte, d.message, d.rule_id, d.suggestions
            );
        }

        assert!(
            diagnostics.len() >= 2,
            "Expected at least 2 spelling errors, got {}",
            diagnostics.len()
        );
        Ok(())
    }

    #[test]
    fn lt_response_deserializes_camel_case() {
        // Real LanguageTool API response (trimmed) — uses camelCase `issueType`
        let json = r#"{
            "matches": [{
                "message": "Possible spelling mistake found.",
                "offset": 10,
                "length": 7,
                "replacements": [{"value": "sentence"}],
                "rule": {
                    "id": "MORFOLOGIK_RULE_EN_US",
                    "description": "Possible spelling mistake",
                    "issueType": "misspelling",
                    "category": {"id": "TYPOS", "name": "Possible Typo"}
                }
            }]
        }"#;
        let res: LTResponse = serde_json::from_str(json).unwrap();
        assert_eq!(res.matches.len(), 1);
        assert_eq!(res.matches[0].rule.id, "MORFOLOGIK_RULE_EN_US");
        assert_eq!(res.matches[0].rule.issue_type, "misspelling");
        assert_eq!(res.matches[0].offset, 10);
        assert_eq!(res.matches[0].length, 7);
        assert_eq!(res.matches[0].replacements[0].value, "sentence");
    }
}