docolint_types/lib.rs
1use serde::Serialize;
2
3/// Represents a single grammar or spelling error returned by LanguageTool.
4///
5/// This struct maps directly from the LanguageTool API response and is used
6/// internally to track error location, message, and suggested replacements.
7#[derive(Debug, Clone, PartialEq)]
8pub struct GrammarError {
9 /// Human-readable description of the error.
10 pub message: String,
11 /// Byte offset of the error within the plain text (excluding markup segments).
12 pub offset: usize,
13 /// Length of the problematic text in bytes.
14 pub length: usize,
15 /// Suggested replacement strings, ordered by preference.
16 pub replacements: Vec<String>,
17 /// LanguageTool rule identifier that triggered this error.
18 pub rule_id: String,
19}
20
21/// A segment of text extracted from source code, with metadata for LanguageTool processing.
22///
23/// Segments are either plain prose (checked by LanguageTool) or markup (skipped during
24/// checking but preserved for offset mapping). The `offset` field tracks the segment's
25/// position in the original source file.
26#[derive(Debug, Clone, PartialEq, Serialize)]
27pub struct TextSegment {
28 /// The text content of this segment.
29 pub text: String,
30 /// When `true`, LanguageTool ignores this segment during checking.
31 /// Used for code, HTML tags, markdown delimiters, etc.
32 ///
33 /// Serialized as `"markup"` for LanguageTool API compatibility.
34 #[serde(rename = "markup")]
35 pub is_markup: bool,
36 /// Byte offset of this segment in the original source content.
37 ///
38 /// Skipped during serialization (`#[serde(skip)]`) as it is internal-only.
39 #[serde(skip)]
40 pub offset: usize,
41}
42
43/// A collection of [`TextSegment`]s representing extracted prose from a source file.
44///
45/// This is the primary output of the parser crate. It separates human-readable text
46/// from code/markup, enabling LanguageTool to check only the relevant portions while
47/// maintaining accurate byte offset mappings back to the original file.
48#[derive(Debug, Clone, PartialEq)]
49pub struct AnnotatedText {
50 /// Ordered segments of text extracted from the source.
51 pub segments: Vec<TextSegment>,
52}
53
54impl From<&str> for AnnotatedText {
55 fn from(text: &str) -> Self {
56 AnnotatedText {
57 segments: vec![TextSegment {
58 text: text.to_string(),
59 is_markup: false,
60 offset: 0,
61 }],
62 }
63 }
64}
65
66impl AnnotatedText {
67 /// Returns all non-markup segment text concatenated.
68 ///
69 /// Use this to get the plain text string that LanguageTool actually checks.
70 /// Offsets returned by LanguageTool are relative to this string.
71 pub fn plain_text(&self) -> String {
72 self.segments
73 .iter()
74 .filter(|s| !s.is_markup)
75 .map(|s| s.text.as_str())
76 .collect()
77 }
78}