Skip to main content

prosaic_core/
refine_diagnosers.rs

1//! Built-in [`Diagnoser`] implementations for the retrospective refine pass.
2//!
3//! Each diagnoser inspects a [`RenderedDocument`] and emits zero or more
4//! [`Diagnostic`]s when it detects the failure mode it watches for. The
5//! diagnoser set is open — callers can register custom diagnosers via
6//! [`RefineConfig::with_diagnoser`] — but the six built-ins below are
7//! the v1 set the spec calls out and ship enabled by default in
8//! [`RefineConfig::balanced`].
9
10#[cfg(not(feature = "std"))]
11use alloc::string::String;
12#[cfg(not(feature = "std"))]
13use alloc::vec;
14#[cfg(not(feature = "std"))]
15use alloc::vec::Vec;
16
17use crate::discourse::ListStyle;
18use crate::refine::{Diagnoser, Diagnostic, RefineConstraint, RenderedDocument};
19use crate::rst::RstRelation;
20use crate::style::StyleProfile;
21
22// ── Connective → family / RST classification ────────────────────────────
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
25enum ConnectorFamily {
26    Continuation,
27    Similarity,
28    Contrast,
29}
30
31/// Map a sentence-leading connective to its (family, RST) pair. Mirrors
32/// the engine's internal pools in `discourse.rs`. If the discourse-side
33/// pools change the mapping must update in sync; the test suite covers
34/// every emitted opener.
35fn classify(connective: &str) -> Option<(ConnectorFamily, RstRelation)> {
36    // Continuation pool (Elaboration in RST terms).
37    for cont in &["Additionally,", "Furthermore,", "It also"] {
38        if connective.starts_with(cont) {
39            return Some((ConnectorFamily::Continuation, RstRelation::Elaboration));
40        }
41    }
42    // Similarity pool (Sequence in RST terms — closest match).
43    for sim in &["Similarly,", "Likewise,"] {
44        if connective.starts_with(sim) {
45            return Some((ConnectorFamily::Similarity, RstRelation::Sequence));
46        }
47    }
48    // Contrast pool.
49    for con in &["Meanwhile,", "However,", "On the other hand,"] {
50        if connective.starts_with(con) {
51            return Some((ConnectorFamily::Contrast, RstRelation::Contrast));
52        }
53    }
54    // Default-Language discourse markers (fall-through bucket).
55    for (prefix, rst) in &[
56        ("Because of this,", RstRelation::Cause),
57        ("As a result,", RstRelation::Result),
58        ("Nevertheless,", RstRelation::Concession),
59        ("Then,", RstRelation::Sequence),
60        ("If this happens,", RstRelation::Condition),
61        ("In summary,", RstRelation::Summary),
62    ] {
63        if connective.starts_with(prefix) {
64            // None of the discourse-marker family classifications matter to
65            // family-saturation diagnosis in v1 — return None for family so
66            // these markers don't count toward continuation/similarity/contrast
67            // budgets, matching the engine's internal accounting.
68            let _ = rst;
69            return None;
70        }
71    }
72    None
73}
74
75// ── ParagraphOpenerMonotony ─────────────────────────────────────────────
76
77/// Detects when the same connective opens ≥ `threshold` of the document's
78/// paragraphs. Default threshold is 3 with a minimum-paragraphs gate of 4
79/// — fires when at least 3 of 4+ paragraphs share an opener, never on
80/// short documents where the pattern is statistically meaningless.
81#[derive(Debug, Clone)]
82pub struct ParagraphOpenerMonotony {
83    pub threshold: usize,
84    pub min_paragraphs: usize,
85}
86
87impl Default for ParagraphOpenerMonotony {
88    fn default() -> Self {
89        Self {
90            threshold: 3,
91            min_paragraphs: 4,
92        }
93    }
94}
95
96impl Diagnoser for ParagraphOpenerMonotony {
97    fn name(&self) -> &'static str {
98        "paragraph_opener_monotony"
99    }
100
101    fn diagnose(
102        &self,
103        document: &RenderedDocument,
104        _profile: Option<&StyleProfile>,
105    ) -> Vec<Diagnostic> {
106        if document.paragraphs.len() < self.min_paragraphs {
107            return Vec::new();
108        }
109        // The first sentence of a paragraph almost never carries a
110        // connective (the engine's `reset_for_paragraph` clears the
111        // last-template-key so `detect_relation` returns None on the
112        // paragraph's first event). The "paragraph opener" we care about
113        // is the first connective that fires inside the paragraph,
114        // regardless of which sentence emits it.
115        let mut count = alloc::collections::BTreeMap::<String, usize>::new();
116        for paragraph in &document.paragraphs {
117            if let Some(c) = paragraph
118                .sentences
119                .iter()
120                .find_map(|s| s.opening_connective.as_ref())
121            {
122                *count.entry(c.clone()).or_insert(0) += 1;
123            }
124        }
125        let mut diagnostics = Vec::new();
126        for (connective, n) in count {
127            if n >= self.threshold {
128                let severity = (n as f32) / (document.paragraphs.len() as f32);
129                diagnostics.push(Diagnostic {
130                    diagnoser: "paragraph_opener_monotony",
131                    severity,
132                    constraints: vec![RefineConstraint::BlacklistConnective(connective)],
133                });
134            }
135        }
136        diagnostics
137    }
138}
139
140// ── ListStyleFatigue ────────────────────────────────────────────────────
141
142/// Detects when the same `ListStyle` dominates the document's list-style
143/// emissions. Fires when one style accounts for ≥ `threshold` of the most
144/// recent `window` emissions, with a minimum-emissions gate.
145#[derive(Debug, Clone)]
146pub struct ListStyleFatigue {
147    pub threshold: usize,
148    pub window: usize,
149    pub min_emissions: usize,
150}
151
152impl Default for ListStyleFatigue {
153    fn default() -> Self {
154        Self {
155            threshold: 3,
156            window: 4,
157            min_emissions: 3,
158        }
159    }
160}
161
162impl Diagnoser for ListStyleFatigue {
163    fn name(&self) -> &'static str {
164        "list_style_fatigue"
165    }
166
167    fn diagnose(
168        &self,
169        document: &RenderedDocument,
170        _profile: Option<&StyleProfile>,
171    ) -> Vec<Diagnostic> {
172        if document.list_styles_used.len() < self.min_emissions {
173            return Vec::new();
174        }
175        let recent_window = document
176            .list_styles_used
177            .iter()
178            .rev()
179            .take(self.window)
180            .collect::<Vec<_>>();
181        let mut count = alloc::collections::BTreeMap::<ListStyle, usize>::new();
182        for u in &recent_window {
183            *count.entry(u.list_style).or_insert(0) += 1;
184        }
185        let mut diagnostics = Vec::new();
186        for (style, n) in count {
187            if n >= self.threshold {
188                let severity = (n as f32) / (recent_window.len() as f32);
189                diagnostics.push(Diagnostic {
190                    diagnoser: "list_style_fatigue",
191                    severity,
192                    constraints: vec![RefineConstraint::BlacklistListStyle(style)],
193                });
194            }
195        }
196        diagnostics
197    }
198}
199
200// ── RstRelationImbalance ────────────────────────────────────────────────
201
202/// Detects when one RST relation accounts for more than `max_share` of
203/// the document's inter-sentence connectives. Default `max_share` is 0.6
204/// (60%); minimum-emissions gate prevents short-document false positives.
205#[derive(Debug, Clone)]
206pub struct RstRelationImbalance {
207    pub max_share: f32,
208    pub min_emissions: usize,
209}
210
211impl Default for RstRelationImbalance {
212    fn default() -> Self {
213        Self {
214            max_share: 0.6,
215            min_emissions: 5,
216        }
217    }
218}
219
220impl Diagnoser for RstRelationImbalance {
221    fn name(&self) -> &'static str {
222        "rst_relation_imbalance"
223    }
224
225    fn diagnose(
226        &self,
227        document: &RenderedDocument,
228        _profile: Option<&StyleProfile>,
229    ) -> Vec<Diagnostic> {
230        let classified: Vec<(String, RstRelation)> = document
231            .connectives_used
232            .iter()
233            .filter_map(|c| classify(&c.connective).map(|(_, rst)| (c.connective.clone(), rst)))
234            .collect();
235        if classified.len() < self.min_emissions {
236            return Vec::new();
237        }
238        let mut count = alloc::collections::BTreeMap::<RstRelation, Vec<String>>::new();
239        for (text, rst) in &classified {
240            count.entry(*rst).or_default().push(text.clone());
241        }
242        let mut diagnostics = Vec::new();
243        let total = classified.len() as f32;
244        for (_rst, connectives) in count {
245            let share = connectives.len() as f32 / total;
246            if share > self.max_share {
247                // Blacklist the most-emitted connective in this relation
248                // bucket; that breaks the imbalance without forbidding the
249                // whole RST family (which would over-correct).
250                let mut occurrence = alloc::collections::BTreeMap::<String, usize>::new();
251                for c in &connectives {
252                    *occurrence.entry(c.clone()).or_insert(0) += 1;
253                }
254                let dominant = occurrence
255                    .into_iter()
256                    .max_by_key(|(_, n)| *n)
257                    .map(|(c, _)| c)
258                    .unwrap_or_default();
259                diagnostics.push(Diagnostic {
260                    diagnoser: "rst_relation_imbalance",
261                    severity: share,
262                    constraints: vec![RefineConstraint::BlacklistConnective(dominant)],
263                });
264            }
265        }
266        diagnostics
267    }
268}
269
270// ── DocumentScopeRhythm ─────────────────────────────────────────────────
271
272/// Detects when sentence-length variance across the whole document drops
273/// below `min_stdev` words. The per-decision rhythm scorer can land each
274/// individual sentence inside a healthy local window while the aggregate
275/// flattens to a monotone cadence — this catches the latter.
276#[derive(Debug, Clone)]
277pub struct DocumentScopeRhythm {
278    pub min_stdev: f32,
279    pub min_sentences: usize,
280}
281
282impl Default for DocumentScopeRhythm {
283    fn default() -> Self {
284        Self {
285            min_stdev: 2.0,
286            min_sentences: 6,
287        }
288    }
289}
290
291impl Diagnoser for DocumentScopeRhythm {
292    fn name(&self) -> &'static str {
293        "document_scope_rhythm"
294    }
295
296    fn diagnose(
297        &self,
298        document: &RenderedDocument,
299        _profile: Option<&StyleProfile>,
300    ) -> Vec<Diagnostic> {
301        if document.sentences.len() < self.min_sentences {
302            return Vec::new();
303        }
304        let lengths: Vec<f32> = document
305            .sentences
306            .iter()
307            .map(|s| s.word_count as f32)
308            .collect();
309        let n = lengths.len() as f32;
310        let mean = lengths.iter().sum::<f32>() / n;
311        let variance = lengths
312            .iter()
313            .map(|x| {
314                let d = x - mean;
315                d * d
316            })
317            .sum::<f32>()
318            / n;
319        let stdev = approx_sqrt(variance);
320        if stdev < self.min_stdev {
321            // Tighten the engine's length distribution toward a more
322            // bursty target. The exact target is a wide-spread default;
323            // the v1 retro-pass intentionally doesn't try to be clever
324            // about reading the profile here — that's
325            // ProfileDistributionDrift's job.
326            let target = crate::style::LengthDistribution {
327                short: 0.4,
328                medium: 0.3,
329                long: 0.3,
330                short_max_words: 8,
331                medium_max_words: 18,
332            };
333            return vec![Diagnostic {
334                diagnoser: "document_scope_rhythm",
335                severity: (self.min_stdev - stdev).max(0.0) / self.min_stdev,
336                constraints: vec![RefineConstraint::TightenLengthDistribution(target)],
337            }];
338        }
339        Vec::new()
340    }
341}
342
343// ── ConnectiveFamilySaturation ──────────────────────────────────────────
344
345/// Detects when one connective family (continuation, similarity, contrast)
346/// emits more than its document-scope budget. Per the existing engine
347/// trailing-window cap, each family caps at the size of its pool inside
348/// any FAMILY_WINDOW span; this diagnoser aggregates across the whole
349/// document and fires when the *cumulative* count exceeds the
350/// `max_per_family` budget.
351#[derive(Debug, Clone)]
352pub struct ConnectiveFamilySaturation {
353    pub max_per_family: usize,
354}
355
356impl Default for ConnectiveFamilySaturation {
357    fn default() -> Self {
358        Self { max_per_family: 4 }
359    }
360}
361
362impl Diagnoser for ConnectiveFamilySaturation {
363    fn name(&self) -> &'static str {
364        "connective_family_saturation"
365    }
366
367    fn diagnose(
368        &self,
369        document: &RenderedDocument,
370        _profile: Option<&StyleProfile>,
371    ) -> Vec<Diagnostic> {
372        let mut by_family = alloc::collections::BTreeMap::<ConnectorFamily, Vec<String>>::new();
373        for u in &document.connectives_used {
374            if let Some((family, _)) = classify(&u.connective) {
375                by_family
376                    .entry(family)
377                    .or_default()
378                    .push(u.connective.clone());
379            }
380        }
381        let mut diagnostics = Vec::new();
382        for (_family, list) in by_family {
383            if list.len() > self.max_per_family {
384                let mut occurrence = alloc::collections::BTreeMap::<String, usize>::new();
385                for c in &list {
386                    *occurrence.entry(c.clone()).or_insert(0) += 1;
387                }
388                let dominant = occurrence
389                    .into_iter()
390                    .max_by_key(|(_, n)| *n)
391                    .map(|(c, _)| c)
392                    .unwrap_or_default();
393                diagnostics.push(Diagnostic {
394                    diagnoser: "connective_family_saturation",
395                    severity: (list.len() as f32) / (self.max_per_family as f32),
396                    constraints: vec![RefineConstraint::BlacklistConnective(dominant)],
397                });
398            }
399        }
400        diagnostics
401    }
402}
403
404// ── ProfileDistributionDrift ───────────────────────────────────────────
405
406/// Active only when a `StyleProfile` is provided. Detects when any of the
407/// profile's target distributions (length, list-style, connective frequency)
408/// diverges from observed by more than `delta`.
409#[derive(Debug, Clone)]
410pub struct ProfileDistributionDrift {
411    pub delta: f32,
412}
413
414impl Default for ProfileDistributionDrift {
415    fn default() -> Self {
416        Self { delta: 0.25 }
417    }
418}
419
420impl Diagnoser for ProfileDistributionDrift {
421    fn name(&self) -> &'static str {
422        "profile_distribution_drift"
423    }
424
425    fn diagnose(
426        &self,
427        document: &RenderedDocument,
428        profile: Option<&StyleProfile>,
429    ) -> Vec<Diagnostic> {
430        let Some(profile) = profile else {
431            return Vec::new();
432        };
433        let mut diagnostics = Vec::new();
434
435        // Length distribution divergence.
436        if !profile.sentence_length.is_neutral() && !document.sentences.is_empty() {
437            let dist = &profile.sentence_length;
438            let mut counts = [0usize; 3];
439            for sentence in &document.sentences {
440                let bucket = if sentence.word_count <= dist.short_max_words as usize {
441                    0
442                } else if sentence.word_count <= dist.medium_max_words as usize {
443                    1
444                } else {
445                    2
446                };
447                counts[bucket] += 1;
448            }
449            let total = document.sentences.len() as f32;
450            let observed = [
451                counts[0] as f32 / total,
452                counts[1] as f32 / total,
453                counts[2] as f32 / total,
454            ];
455            let target_sum = dist.short + dist.medium + dist.long;
456            if target_sum > 0.0 {
457                let target = [
458                    dist.short / target_sum,
459                    dist.medium / target_sum,
460                    dist.long / target_sum,
461                ];
462                let max_diff = (0..3)
463                    .map(|i| (observed[i] - target[i]).abs())
464                    .fold(0.0_f32, f32::max);
465                if max_diff > self.delta {
466                    diagnostics.push(Diagnostic {
467                        diagnoser: "profile_distribution_drift",
468                        severity: max_diff,
469                        constraints: vec![RefineConstraint::TightenLengthDistribution(
470                            dist.clone(),
471                        )],
472                    });
473                }
474            }
475        }
476
477        diagnostics
478    }
479}
480
481/// Newton-Raphson `sqrt` approximation. Used in place of `f32::sqrt` so
482/// the refine module stays no_std-compatible.
483fn approx_sqrt(x: f32) -> f32 {
484    if x <= 0.0 {
485        return 0.0;
486    }
487    let mut g = if x >= 1.0 { x } else { 1.0 };
488    for _ in 0..6 {
489        g = 0.5 * (g + x / g);
490    }
491    g
492}
493
494// ── Convenience: built-in set ──────────────────────────────────────────
495
496/// Build the v1 default set of six built-in diagnosers as `Arc<dyn Diagnoser>`,
497/// in their canonical order. Use this with
498/// [`crate::RefineConfig::with_diagnoser`] to attach the full pool.
499pub fn default_set() -> Vec<alloc::sync::Arc<dyn Diagnoser>> {
500    use alloc::sync::Arc;
501    vec![
502        Arc::new(ParagraphOpenerMonotony::default()),
503        Arc::new(ListStyleFatigue::default()),
504        Arc::new(RstRelationImbalance::default()),
505        Arc::new(DocumentScopeRhythm::default()),
506        Arc::new(ConnectiveFamilySaturation::default()),
507        Arc::new(ProfileDistributionDrift::default()),
508    ]
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use crate::refine::{EventMeta, ParagraphRender, RenderedDocument};
515
516    fn doc_with_paragraph_openers(openers: &[Option<&str>]) -> RenderedDocument {
517        // Two events per paragraph so the connective lands at sentence 1
518        // (the natural place for an inter-event opener), matching how
519        // the engine actually emits connectives.
520        let paragraphs: Vec<ParagraphRender> = openers
521            .iter()
522            .enumerate()
523            .map(|(i, opener)| {
524                let text = match opener {
525                    Some(o) => format!("Lead in para {i}. {o} continuation here."),
526                    None => format!("Lead in para {i}. Continuation here."),
527                };
528                ParagraphRender {
529                    text,
530                    events: vec![
531                        EventMeta {
532                            connective: None,
533                            list_style: None,
534                        },
535                        EventMeta {
536                            connective: opener.map(|s| s.to_string()),
537                            list_style: None,
538                        },
539                    ],
540                }
541            })
542            .collect();
543        RenderedDocument::from_paragraphs(paragraphs)
544    }
545
546    fn doc_with_list_styles(styles: &[ListStyle]) -> RenderedDocument {
547        // One paragraph per emission, each a single sentence.
548        let paragraphs: Vec<ParagraphRender> = styles
549            .iter()
550            .enumerate()
551            .map(|(i, ls)| ParagraphRender {
552                text: format!("Sentence {i} containing items."),
553                events: vec![EventMeta {
554                    connective: None,
555                    list_style: Some(*ls),
556                }],
557            })
558            .collect();
559        RenderedDocument::from_paragraphs(paragraphs)
560    }
561
562    fn doc_with_connectives(connectives: &[&str]) -> RenderedDocument {
563        let paragraphs: Vec<ParagraphRender> = connectives
564            .iter()
565            .enumerate()
566            .map(|(i, c)| ParagraphRender {
567                text: format!("{c} sentence number {i}."),
568                events: vec![EventMeta {
569                    connective: Some((*c).to_string()),
570                    list_style: None,
571                }],
572            })
573            .collect();
574        RenderedDocument::from_paragraphs(paragraphs)
575    }
576
577    fn doc_with_sentence_lengths(lengths: &[usize]) -> RenderedDocument {
578        // Pack each length into one paragraph as a single sentence.
579        let paragraphs: Vec<ParagraphRender> = lengths
580            .iter()
581            .map(|&n| {
582                let words = (0..n).map(|_| "word").collect::<Vec<_>>().join(" ");
583                ParagraphRender {
584                    text: format!("{words}."),
585                    events: vec![EventMeta::default()],
586                }
587            })
588            .collect();
589        RenderedDocument::from_paragraphs(paragraphs)
590    }
591
592    // ── ParagraphOpenerMonotony ──────────────────────────────────────────
593
594    #[test]
595    fn paragraph_opener_monotony_fires_at_threshold() {
596        let doc = doc_with_paragraph_openers(&[
597            Some("Additionally,"),
598            Some("Additionally,"),
599            Some("Additionally,"),
600            Some("However,"),
601        ]);
602        let d = ParagraphOpenerMonotony::default().diagnose(&doc, None);
603        assert_eq!(d.len(), 1);
604        assert!(matches!(
605            &d[0].constraints[0],
606            RefineConstraint::BlacklistConnective(s) if s.starts_with("Additionally,")
607        ));
608    }
609
610    #[test]
611    fn paragraph_opener_monotony_silent_below_threshold() {
612        let doc = doc_with_paragraph_openers(&[
613            Some("Additionally,"),
614            Some("Additionally,"),
615            Some("Furthermore,"),
616            Some("However,"),
617        ]);
618        let d = ParagraphOpenerMonotony::default().diagnose(&doc, None);
619        assert!(d.is_empty());
620    }
621
622    #[test]
623    fn paragraph_opener_monotony_silent_for_short_docs() {
624        let doc = doc_with_paragraph_openers(&[Some("Additionally,"), Some("Additionally,")]);
625        let d = ParagraphOpenerMonotony::default().diagnose(&doc, None);
626        assert!(d.is_empty());
627    }
628
629    // ── ListStyleFatigue ─────────────────────────────────────────────────
630
631    #[test]
632    fn list_style_fatigue_fires_when_one_style_dominates_window() {
633        let doc = doc_with_list_styles(&[
634            ListStyle::Including,
635            ListStyle::Including,
636            ListStyle::Including,
637            ListStyle::SuchAs,
638        ]);
639        let d = ListStyleFatigue::default().diagnose(&doc, None);
640        assert_eq!(d.len(), 1);
641        assert!(matches!(
642            d[0].constraints[0],
643            RefineConstraint::BlacklistListStyle(ListStyle::Including)
644        ));
645    }
646
647    #[test]
648    fn list_style_fatigue_silent_when_diverse() {
649        let doc = doc_with_list_styles(&[
650            ListStyle::Including,
651            ListStyle::SuchAs,
652            ListStyle::Dash,
653            ListStyle::Bracketed,
654        ]);
655        let d = ListStyleFatigue::default().diagnose(&doc, None);
656        assert!(d.is_empty());
657    }
658
659    // ── RstRelationImbalance ─────────────────────────────────────────────
660
661    #[test]
662    fn rst_imbalance_fires_when_one_relation_dominates() {
663        let doc = doc_with_connectives(&[
664            "Additionally,", // Elaboration
665            "Additionally,",
666            "Furthermore,", // Elaboration
667            "Additionally,",
668            "However,", // Contrast
669        ]);
670        // 4/5 emissions are Elaboration → above 0.6 threshold.
671        let d = RstRelationImbalance::default().diagnose(&doc, None);
672        assert_eq!(d.len(), 1);
673    }
674
675    #[test]
676    fn rst_imbalance_silent_when_balanced() {
677        let doc = doc_with_connectives(&[
678            "Additionally,",
679            "Additionally,",
680            "However,",
681            "However,",
682            "Similarly,",
683        ]);
684        let d = RstRelationImbalance::default().diagnose(&doc, None);
685        assert!(d.is_empty());
686    }
687
688    // ── DocumentScopeRhythm ──────────────────────────────────────────────
689
690    #[test]
691    fn document_scope_rhythm_fires_when_lengths_are_flat() {
692        let doc = doc_with_sentence_lengths(&[10, 10, 10, 10, 10, 10]);
693        let d = DocumentScopeRhythm::default().diagnose(&doc, None);
694        assert_eq!(d.len(), 1);
695        assert!(matches!(
696            d[0].constraints[0],
697            RefineConstraint::TightenLengthDistribution(_)
698        ));
699    }
700
701    #[test]
702    fn document_scope_rhythm_silent_when_lengths_vary() {
703        let doc = doc_with_sentence_lengths(&[3, 12, 5, 18, 7, 14]);
704        let d = DocumentScopeRhythm::default().diagnose(&doc, None);
705        assert!(d.is_empty());
706    }
707
708    // ── ConnectiveFamilySaturation ───────────────────────────────────────
709
710    #[test]
711    fn connective_family_saturation_fires_above_budget() {
712        let doc = doc_with_connectives(&[
713            "Additionally,",
714            "Additionally,",
715            "Additionally,",
716            "Additionally,",
717            "Additionally,", // 5 continuations > 4 budget
718        ]);
719        let d = ConnectiveFamilySaturation::default().diagnose(&doc, None);
720        assert_eq!(d.len(), 1);
721    }
722
723    #[test]
724    fn connective_family_saturation_silent_at_budget() {
725        let doc =
726            doc_with_connectives(&["Additionally,", "Additionally,", "Furthermore,", "It also"]);
727        let d = ConnectiveFamilySaturation::default().diagnose(&doc, None);
728        assert!(d.is_empty());
729    }
730
731    // ── ProfileDistributionDrift ─────────────────────────────────────────
732
733    #[test]
734    fn profile_drift_silent_without_profile() {
735        let doc = doc_with_sentence_lengths(&[3, 5, 7, 9]);
736        let d = ProfileDistributionDrift::default().diagnose(&doc, None);
737        assert!(d.is_empty());
738    }
739
740    #[test]
741    fn profile_drift_silent_with_neutral_profile() {
742        let doc = doc_with_sentence_lengths(&[3, 5, 7, 9]);
743        let p = StyleProfile::neutral();
744        let d = ProfileDistributionDrift::default().diagnose(&doc, Some(&p));
745        assert!(d.is_empty());
746    }
747
748    #[test]
749    fn profile_drift_fires_when_observed_misses_target() {
750        // All sentences are short (<= 8 words). Target wants long-leaning.
751        let doc = doc_with_sentence_lengths(&[3, 4, 5, 4, 3, 5]);
752        let target = crate::style::LengthDistribution {
753            short: 0.0,
754            medium: 0.0,
755            long: 1.0,
756            short_max_words: 8,
757            medium_max_words: 18,
758        };
759        let p = StyleProfile::builder("long-target")
760            .sentence_length(target)
761            .build()
762            .unwrap();
763        let d = ProfileDistributionDrift::default().diagnose(&doc, Some(&p));
764        assert_eq!(d.len(), 1);
765    }
766}