docolint_types/lib.rs
1use serde::Serialize;
2
3/// Represents a single grammar or spelling error returned by LanguageTool.
4///
5/// This struct maps directly from the LanguageTool API response and is used
6/// internally to track error location, message, and suggested replacements.
7#[derive(Debug, Clone, PartialEq)]
8pub struct GrammarError {
9 /// Human-readable description of the error.
10 pub message: String,
11 /// Byte offset of the error within the plain text (excluding markup segments).
12 pub offset: usize,
13 /// Length of the problematic text in bytes.
14 pub length: usize,
15 /// Suggested replacement strings, ordered by preference.
16 pub replacements: Vec<String>,
17 /// LanguageTool rule identifier that triggered this error.
18 pub rule_id: String,
19}
20
21/// A segment of text extracted from source code, with metadata for LanguageTool processing.
22///
23/// Segments are either plain prose (checked by LanguageTool) or markup (skipped during
24/// checking but preserved for offset mapping). The `offset` field tracks the segment's
25/// position in the original source file.
26#[derive(Debug, Clone, PartialEq, Serialize)]
27pub struct TextSegment {
28 /// The text content of this segment.
29 pub text: String,
30 /// When `true`, LanguageTool ignores this segment during checking.
31 /// Used for code, HTML tags, markdown delimiters, etc.
32 ///
33 /// Serialized as `"markup"` for LanguageTool API compatibility.
34 #[serde(rename = "markup")]
35 pub is_markup: bool,
36 /// Byte offset of this segment in the original source content.
37 ///
38 /// Skipped during serialization (`#[serde(skip)]`) as it is internal-only.
39 #[serde(skip)]
40 pub offset: usize,
41 /// LanguageTool check unit identifier for grouping related prose segments.
42 ///
43 /// Skipped during serialization (`#[serde(skip)]`) as it is internal-only.
44 #[serde(skip)]
45 pub unit_id: usize,
46}
47
48/// A collection of [`TextSegment`]s representing extracted prose from a source file.
49///
50/// This is the primary output of the parser crate. It separates human-readable text
51/// from code/markup, enabling LanguageTool to check only the relevant portions while
52/// maintaining accurate byte offset mappings back to the original file.
53#[derive(Debug, Clone, PartialEq)]
54pub struct AnnotatedText {
55 /// Ordered segments of text extracted from the source.
56 pub segments: Vec<TextSegment>,
57}
58
59impl From<&str> for AnnotatedText {
60 fn from(text: &str) -> Self {
61 AnnotatedText {
62 segments: vec![TextSegment {
63 text: text.to_string(),
64 is_markup: false,
65 offset: 0,
66 unit_id: 0,
67 }],
68 }
69 }
70}
71
72impl AnnotatedText {
73 /// Returns all non-markup segment text concatenated.
74 ///
75 /// Use this to get the plain text string that LanguageTool actually checks.
76 /// Offsets returned by LanguageTool are relative to this string.
77 pub fn plain_text(&self) -> String {
78 self.segments
79 .iter()
80 .filter(|s| !s.is_markup)
81 .map(|s| s.text.as_str())
82 .collect()
83 }
84}
85
86#[cfg(test)]
87mod tests {
88 use super::TextSegment;
89
90 #[test]
91 fn text_segment_skips_internal_metadata_in_serde() {
92 let segment = TextSegment {
93 text: "Hello".to_string(),
94 is_markup: false,
95 offset: 12,
96 unit_id: 34,
97 };
98
99 let value = serde_json::to_value(&segment).unwrap();
100
101 assert_eq!(
102 value,
103 serde_json::json!({ "text": "Hello", "markup": false })
104 );
105 }
106}