docolint_types/
lib.rs

1use serde::Serialize;
2
3/// Represents a single grammar or spelling error returned by LanguageTool.
4///
5/// This struct maps directly from the LanguageTool API response and is used
6/// internally to track error location, message, and suggested replacements.
7#[derive(Debug, Clone, PartialEq)]
8pub struct GrammarError {
9    /// Human-readable description of the error.
10    pub message: String,
11    /// Byte offset of the error within the plain text (excluding markup segments).
12    pub offset: usize,
13    /// Length of the problematic text in bytes.
14    pub length: usize,
15    /// Suggested replacement strings, ordered by preference.
16    pub replacements: Vec<String>,
17    /// LanguageTool rule identifier that triggered this error.
18    pub rule_id: String,
19}
20
21/// A segment of text extracted from source code, with metadata for LanguageTool processing.
22///
23/// Segments are either plain prose (checked by LanguageTool) or markup (skipped during
24/// checking but preserved for offset mapping). The `offset` field tracks the segment's
25/// position in the original source file.
26#[derive(Debug, Clone, PartialEq, Serialize)]
27pub struct TextSegment {
28    /// The text content of this segment.
29    pub text: String,
30    /// When `true`, LanguageTool ignores this segment during checking.
31    /// Used for code, HTML tags, markdown delimiters, etc.
32    ///
33    /// Serialized as `"markup"` for LanguageTool API compatibility.
34    #[serde(rename = "markup")]
35    pub is_markup: bool,
36    /// Byte offset of this segment in the original source content.
37    ///
38    /// Skipped during serialization (`#[serde(skip)]`) as it is internal-only.
39    #[serde(skip)]
40    pub offset: usize,
41}
42
43/// A collection of [`TextSegment`]s representing extracted prose from a source file.
44///
45/// This is the primary output of the parser crate. It separates human-readable text
46/// from code/markup, enabling LanguageTool to check only the relevant portions while
47/// maintaining accurate byte offset mappings back to the original file.
48#[derive(Debug, Clone, PartialEq)]
49pub struct AnnotatedText {
50    /// Ordered segments of text extracted from the source.
51    pub segments: Vec<TextSegment>,
52}
53
54impl From<&str> for AnnotatedText {
55    fn from(text: &str) -> Self {
56        AnnotatedText {
57            segments: vec![TextSegment {
58                text: text.to_string(),
59                is_markup: false,
60                offset: 0,
61            }],
62        }
63    }
64}
65
66impl AnnotatedText {
67    /// Returns all non-markup segment text concatenated.
68    ///
69    /// Use this to get the plain text string that LanguageTool actually checks.
70    /// Offsets returned by LanguageTool are relative to this string.
71    pub fn plain_text(&self) -> String {
72        self.segments
73            .iter()
74            .filter(|s| !s.is_markup)
75            .map(|s| s.text.as_str())
76            .collect()
77    }
78}
docolint_types/lib.rs

docolint_types/
lib.rs