docolint_types/
lib.rs

1use serde::Serialize;
2
3/// Represents a single grammar or spelling error returned by LanguageTool.
4///
5/// This struct maps directly from the LanguageTool API response and is used
6/// internally to track error location, message, and suggested replacements.
7#[derive(Debug, Clone, PartialEq)]
8pub struct GrammarError {
9    /// Human-readable description of the error.
10    pub message: String,
11    /// Byte offset of the error within the plain text (excluding markup segments).
12    pub offset: usize,
13    /// Length of the problematic text in bytes.
14    pub length: usize,
15    /// Suggested replacement strings, ordered by preference.
16    pub replacements: Vec<String>,
17    /// LanguageTool rule identifier that triggered this error.
18    pub rule_id: String,
19}
20
21/// A segment of text extracted from source code, with metadata for LanguageTool processing.
22///
23/// Segments are either plain prose (checked by LanguageTool) or markup (skipped during
24/// checking but preserved for offset mapping). The `offset` field tracks the segment's
25/// position in the original source file.
26#[derive(Debug, Clone, PartialEq, Serialize)]
27pub struct TextSegment {
28    /// The text content of this segment.
29    pub text: String,
30    /// When `true`, LanguageTool ignores this segment during checking.
31    /// Used for code, HTML tags, markdown delimiters, etc.
32    ///
33    /// Serialized as `"markup"` for LanguageTool API compatibility.
34    #[serde(rename = "markup")]
35    pub is_markup: bool,
36    /// Byte offset of this segment in the original source content.
37    ///
38    /// Skipped during serialization (`#[serde(skip)]`) as it is internal-only.
39    #[serde(skip)]
40    pub offset: usize,
41    /// LanguageTool check unit identifier for grouping related prose segments.
42    ///
43    /// Skipped during serialization (`#[serde(skip)]`) as it is internal-only.
44    #[serde(skip)]
45    pub unit_id: usize,
46}
47
48/// A collection of [`TextSegment`]s representing extracted prose from a source file.
49///
50/// This is the primary output of the parser crate. It separates human-readable text
51/// from code/markup, enabling LanguageTool to check only the relevant portions while
52/// maintaining accurate byte offset mappings back to the original file.
53#[derive(Debug, Clone, PartialEq)]
54pub struct AnnotatedText {
55    /// Ordered segments of text extracted from the source.
56    pub segments: Vec<TextSegment>,
57}
58
59impl From<&str> for AnnotatedText {
60    fn from(text: &str) -> Self {
61        AnnotatedText {
62            segments: vec![TextSegment {
63                text: text.to_string(),
64                is_markup: false,
65                offset: 0,
66                unit_id: 0,
67            }],
68        }
69    }
70}
71
72impl AnnotatedText {
73    /// Returns all non-markup segment text concatenated.
74    ///
75    /// Use this to get the plain text string that LanguageTool actually checks.
76    /// Offsets returned by LanguageTool are relative to this string.
77    pub fn plain_text(&self) -> String {
78        self.segments
79            .iter()
80            .filter(|s| !s.is_markup)
81            .map(|s| s.text.as_str())
82            .collect()
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::TextSegment;
89
90    #[test]
91    fn text_segment_skips_internal_metadata_in_serde() {
92        let segment = TextSegment {
93            text: "Hello".to_string(),
94            is_markup: false,
95            offset: 12,
96            unit_id: 34,
97        };
98
99        let value = serde_json::to_value(&segment).unwrap();
100
101        assert_eq!(
102            value,
103            serde_json::json!({ "text": "Hello", "markup": false })
104        );
105    }
106}
docolint_types/lib.rs

docolint_types/
lib.rs