bito-lint-core 0.1.5

Core library for bito-lint
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
//! Report structs for comprehensive writing analysis.
//!
//! All structs derive `Serialize`, `Deserialize`, and `JsonSchema` for
//! use in both CLI JSON output and MCP tool responses.

use std::collections::HashMap;

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

use crate::grammar::GrammarReport;
use crate::readability::ReadabilityReport;

/// Full writing analysis report combining all checks.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct FullAnalysisReport {
    /// Readability scoring.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub readability: Option<ReadabilityReport>,
    /// Grammar and passive voice analysis.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub grammar: Option<GrammarReport>,
    /// Glue word density per sentence.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sticky_sentences: Option<StickySentencesReport>,
    /// Sentence pacing distribution.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub pacing: Option<PacingReport>,
    /// Sentence length variety.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sentence_length: Option<SentenceLengthReport>,
    /// Transition word usage.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub transitions: Option<TransitionReport>,
    /// Overused word detection.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub overused_words: Option<OverusedWordsReport>,
    /// Repeated phrase detection.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub repeated_phrases: Option<RepeatedPhrasesReport>,
    /// Word proximity repetition.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub echoes: Option<EchoesReport>,
    /// Sensory vocabulary distribution.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sensory: Option<SensoryReport>,
    /// Vague word usage.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub diction: Option<DictionReport>,
    /// Cliché detection.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cliches: Option<ClichesReport>,
    /// Spelling/hyphenation consistency.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub consistency: Option<ConsistencyReport>,
    /// Acronym frequency.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub acronyms: Option<AcronymReport>,
    /// Business jargon detection.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub jargon: Option<BusinessJargonReport>,
    /// Dense paragraph detection.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub complex_paragraphs: Option<ComplexParagraphsReport>,
    /// Conjunction-starting sentences.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub conjunction_starts: Option<ConjunctionStartsReport>,
    /// Style scoring (adverbs, hidden verbs, composite).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub style: Option<StyleReport>,
}

// -- Sticky Sentences -------------------------------------------------------

/// Glue word density analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct StickySentencesReport {
    /// Overall percentage of glue words.
    pub overall_glue_index: f64,
    /// Sentences with >45% glue words.
    pub sticky_count: usize,
    /// Sentences with 35–45% glue words.
    pub semi_sticky_count: usize,
    /// Details for sticky sentences.
    pub sticky_sentences: Vec<StickySentence>,
}

/// A sentence flagged for high glue-word density.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct StickySentence {
    /// Sentence number (1-indexed).
    pub sentence_num: usize,
    /// Percentage of glue words.
    pub glue_percentage: f64,
    /// Truncated text (max 100 chars).
    pub text: String,
}

// -- Pacing -----------------------------------------------------------------

/// Sentence pacing distribution.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct PacingReport {
    /// Percentage of fast-paced sentences (<10 words).
    pub fast_percentage: f64,
    /// Percentage of medium-paced sentences (10–20 words).
    pub medium_percentage: f64,
    /// Percentage of slow-paced sentences (>20 words).
    pub slow_percentage: f64,
}

// -- Sentence Length --------------------------------------------------------

/// Sentence length variety analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct SentenceLengthReport {
    /// Average sentence length in words.
    pub avg_length: f64,
    /// Standard deviation of sentence lengths.
    pub std_deviation: f64,
    /// Variety score (0–10, higher = more varied).
    pub variety_score: f64,
    /// Shortest sentence length.
    pub shortest: usize,
    /// Longest sentence length.
    pub longest: usize,
    /// Sentences with >30 words.
    pub very_long: Vec<LongSentence>,
}

/// A sentence flagged as very long.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct LongSentence {
    /// Sentence number (1-indexed).
    pub sentence_num: usize,
    /// Word count.
    pub word_count: usize,
}

// -- Transitions ------------------------------------------------------------

/// Transition word usage analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct TransitionReport {
    /// Sentences containing at least one transition.
    pub sentences_with_transitions: usize,
    /// Percentage of sentences with transitions.
    pub transition_percentage: f64,
    /// Total transition instances.
    pub total_transitions: usize,
    /// Distinct transition types.
    pub unique_transitions: usize,
    /// Most common transitions, sorted by frequency.
    pub most_common: Vec<TransitionCount>,
}

/// A transition with its frequency.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct TransitionCount {
    /// The transition word or phrase.
    pub transition: String,
    /// Number of occurrences.
    pub count: usize,
}

// -- Overused Words ---------------------------------------------------------

/// Overused word detection.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct OverusedWordsReport {
    /// Words appearing with >0.5% frequency.
    pub overused_words: Vec<OverusedWord>,
    /// Total distinct words in text.
    pub total_unique_words: usize,
}

/// An overused word with frequency data.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct OverusedWord {
    /// The word.
    pub word: String,
    /// Occurrence count.
    pub count: usize,
    /// Percentage of total words.
    pub frequency: f64,
}

// -- Repeated Phrases -------------------------------------------------------

/// Repeated phrase detection.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct RepeatedPhrasesReport {
    /// Total repeated phrases found.
    pub total_repeated: usize,
    /// Top repeated phrases (up to 50).
    pub phrases: Vec<RepeatedPhrase>,
}

/// A phrase that appears multiple times.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct RepeatedPhrase {
    /// The phrase.
    pub phrase: String,
    /// Number of occurrences.
    pub count: usize,
}

// -- Echoes -----------------------------------------------------------------

/// Word proximity repetition analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct EchoesReport {
    /// Total echo instances found.
    pub total_echoes: usize,
    /// Top echoes (up to 50).
    pub echoes: Vec<Echo>,
}

/// A word repeated within close proximity.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct Echo {
    /// The repeated word.
    pub word: String,
    /// Paragraph number (1-indexed).
    pub paragraph: usize,
    /// Words between occurrences.
    pub distance: usize,
    /// Total occurrences in paragraph.
    pub occurrences: usize,
}

// -- Sensory Words ----------------------------------------------------------

/// Sensory vocabulary analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct SensoryReport {
    /// Total sensory words found.
    pub sensory_count: usize,
    /// Percentage of all words that are sensory.
    pub sensory_percentage: f64,
    /// Breakdown by sense (sight, sound, touch, smell, taste).
    pub by_sense: HashMap<String, SenseData>,
}

/// Data for a single sense category.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct SenseData {
    /// Words matching this sense.
    pub count: usize,
    /// Percentage of sensory words from this sense.
    pub percentage: f64,
}

// -- Diction ----------------------------------------------------------------

/// Vague word analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct DictionReport {
    /// Total vague word occurrences.
    pub total_vague: usize,
    /// Distinct vague words used.
    pub unique_vague: usize,
    /// Most common vague words, sorted by count.
    pub most_common: Vec<VagueWordCount>,
}

/// A vague word with its count.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct VagueWordCount {
    /// The vague word.
    pub word: String,
    /// Occurrence count.
    pub count: usize,
}

// -- Clichés ----------------------------------------------------------------

/// Cliché detection.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ClichesReport {
    /// Total cliché instances.
    pub total_cliches: usize,
    /// Clichés found.
    pub cliches: Vec<ClicheFound>,
}

/// A cliché found in the text.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ClicheFound {
    /// The cliché phrase.
    pub cliche: String,
    /// Number of occurrences.
    pub count: usize,
}

// -- Consistency ------------------------------------------------------------

/// Spelling and formatting consistency.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ConsistencyReport {
    /// Active dialect, if enforced (e.g., "en-us", "en-gb").
    #[serde(skip_serializing_if = "Option::is_none")]
    pub dialect: Option<String>,
    /// Total inconsistency issues.
    pub total_issues: usize,
    /// Human-readable issue descriptions.
    pub issues: Vec<String>,
}

// -- Acronyms ---------------------------------------------------------------

/// Acronym usage analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AcronymReport {
    /// Total acronym instances.
    pub total_acronyms: usize,
    /// Distinct acronyms.
    pub unique_acronyms: usize,
    /// Acronyms sorted by frequency.
    pub acronym_list: Vec<AcronymCount>,
}

/// An acronym with its frequency.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AcronymCount {
    /// The acronym.
    pub acronym: String,
    /// Number of occurrences.
    pub count: usize,
}

// -- Business Jargon --------------------------------------------------------

/// Business jargon detection.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct BusinessJargonReport {
    /// Total jargon instances.
    pub total_jargon: usize,
    /// Distinct jargon terms.
    pub unique_jargon: usize,
    /// Jargon found, sorted by frequency.
    pub jargon_list: Vec<JargonFound>,
}

/// A jargon term found in the text.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct JargonFound {
    /// The jargon word or phrase.
    pub jargon: String,
    /// Number of occurrences.
    pub count: usize,
}

// -- Complex Paragraphs -----------------------------------------------------

/// Dense paragraph analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ComplexParagraphsReport {
    /// Number of complex paragraphs.
    pub complex_count: usize,
    /// Percentage of paragraphs that are complex.
    pub percentage: f64,
    /// Details for each complex paragraph.
    pub complex_paragraphs: Vec<ComplexParagraph>,
}

/// A paragraph flagged as complex.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ComplexParagraph {
    /// Paragraph number (1-indexed).
    pub paragraph_num: usize,
    /// Average sentence length in words.
    pub avg_sentence_length: f64,
    /// Average syllables per word.
    pub avg_syllables: f64,
}

// -- Conjunction Starts -----------------------------------------------------

/// Conjunction-starting sentence analysis.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ConjunctionStartsReport {
    /// Number of sentences starting with a conjunction.
    pub count: usize,
    /// Percentage of total sentences.
    pub percentage: f64,
}

// -- Style ------------------------------------------------------------------

/// Style analysis: adverbs, hidden verbs, and composite score.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct StyleReport {
    /// Count of adverbs (words ending in -ly).
    pub adverb_count: usize,
    /// Hidden verbs found (noun forms that should be verbs).
    pub hidden_verbs: Vec<HiddenVerbSuggestion>,
    /// Composite style score (0–100).
    pub style_score: i32,
}

/// A hidden verb suggestion.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct HiddenVerbSuggestion {
    /// The noun form found.
    pub noun: String,
    /// The verb form to use instead.
    pub verb: String,
    /// Number of occurrences.
    pub count: usize,
}