1#[cfg(not(feature = "std"))]
11use alloc::string::String;
12#[cfg(not(feature = "std"))]
13use alloc::vec;
14#[cfg(not(feature = "std"))]
15use alloc::vec::Vec;
16
17use crate::discourse::ListStyle;
18use crate::refine::{Diagnoser, Diagnostic, RefineConstraint, RenderedDocument};
19use crate::rst::RstRelation;
20use crate::style::StyleProfile;
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
25enum ConnectorFamily {
26 Continuation,
27 Similarity,
28 Contrast,
29}
30
31fn classify(connective: &str) -> Option<(ConnectorFamily, RstRelation)> {
36 for cont in &["Additionally,", "Furthermore,", "It also"] {
38 if connective.starts_with(cont) {
39 return Some((ConnectorFamily::Continuation, RstRelation::Elaboration));
40 }
41 }
42 for sim in &["Similarly,", "Likewise,"] {
44 if connective.starts_with(sim) {
45 return Some((ConnectorFamily::Similarity, RstRelation::Sequence));
46 }
47 }
48 for con in &["Meanwhile,", "However,", "On the other hand,"] {
50 if connective.starts_with(con) {
51 return Some((ConnectorFamily::Contrast, RstRelation::Contrast));
52 }
53 }
54 for (prefix, rst) in &[
56 ("Because of this,", RstRelation::Cause),
57 ("As a result,", RstRelation::Result),
58 ("Nevertheless,", RstRelation::Concession),
59 ("Then,", RstRelation::Sequence),
60 ("If this happens,", RstRelation::Condition),
61 ("In summary,", RstRelation::Summary),
62 ] {
63 if connective.starts_with(prefix) {
64 let _ = rst;
69 return None;
70 }
71 }
72 None
73}
74
75#[derive(Debug, Clone)]
82pub struct ParagraphOpenerMonotony {
83 pub threshold: usize,
84 pub min_paragraphs: usize,
85}
86
87impl Default for ParagraphOpenerMonotony {
88 fn default() -> Self {
89 Self {
90 threshold: 3,
91 min_paragraphs: 4,
92 }
93 }
94}
95
96impl Diagnoser for ParagraphOpenerMonotony {
97 fn name(&self) -> &'static str {
98 "paragraph_opener_monotony"
99 }
100
101 fn diagnose(
102 &self,
103 document: &RenderedDocument,
104 _profile: Option<&StyleProfile>,
105 ) -> Vec<Diagnostic> {
106 if document.paragraphs.len() < self.min_paragraphs {
107 return Vec::new();
108 }
109 let mut count = alloc::collections::BTreeMap::<String, usize>::new();
116 for paragraph in &document.paragraphs {
117 if let Some(c) = paragraph
118 .sentences
119 .iter()
120 .find_map(|s| s.opening_connective.as_ref())
121 {
122 *count.entry(c.clone()).or_insert(0) += 1;
123 }
124 }
125 let mut diagnostics = Vec::new();
126 for (connective, n) in count {
127 if n >= self.threshold {
128 let severity = (n as f32) / (document.paragraphs.len() as f32);
129 diagnostics.push(Diagnostic {
130 diagnoser: "paragraph_opener_monotony",
131 severity,
132 constraints: vec![RefineConstraint::BlacklistConnective(connective)],
133 });
134 }
135 }
136 diagnostics
137 }
138}
139
140#[derive(Debug, Clone)]
146pub struct ListStyleFatigue {
147 pub threshold: usize,
148 pub window: usize,
149 pub min_emissions: usize,
150}
151
152impl Default for ListStyleFatigue {
153 fn default() -> Self {
154 Self {
155 threshold: 3,
156 window: 4,
157 min_emissions: 3,
158 }
159 }
160}
161
162impl Diagnoser for ListStyleFatigue {
163 fn name(&self) -> &'static str {
164 "list_style_fatigue"
165 }
166
167 fn diagnose(
168 &self,
169 document: &RenderedDocument,
170 _profile: Option<&StyleProfile>,
171 ) -> Vec<Diagnostic> {
172 if document.list_styles_used.len() < self.min_emissions {
173 return Vec::new();
174 }
175 let recent_window = document
176 .list_styles_used
177 .iter()
178 .rev()
179 .take(self.window)
180 .collect::<Vec<_>>();
181 let mut count = alloc::collections::BTreeMap::<ListStyle, usize>::new();
182 for u in &recent_window {
183 *count.entry(u.list_style).or_insert(0) += 1;
184 }
185 let mut diagnostics = Vec::new();
186 for (style, n) in count {
187 if n >= self.threshold {
188 let severity = (n as f32) / (recent_window.len() as f32);
189 diagnostics.push(Diagnostic {
190 diagnoser: "list_style_fatigue",
191 severity,
192 constraints: vec![RefineConstraint::BlacklistListStyle(style)],
193 });
194 }
195 }
196 diagnostics
197 }
198}
199
200#[derive(Debug, Clone)]
206pub struct RstRelationImbalance {
207 pub max_share: f32,
208 pub min_emissions: usize,
209}
210
211impl Default for RstRelationImbalance {
212 fn default() -> Self {
213 Self {
214 max_share: 0.6,
215 min_emissions: 5,
216 }
217 }
218}
219
220impl Diagnoser for RstRelationImbalance {
221 fn name(&self) -> &'static str {
222 "rst_relation_imbalance"
223 }
224
225 fn diagnose(
226 &self,
227 document: &RenderedDocument,
228 _profile: Option<&StyleProfile>,
229 ) -> Vec<Diagnostic> {
230 let classified: Vec<(String, RstRelation)> = document
231 .connectives_used
232 .iter()
233 .filter_map(|c| classify(&c.connective).map(|(_, rst)| (c.connective.clone(), rst)))
234 .collect();
235 if classified.len() < self.min_emissions {
236 return Vec::new();
237 }
238 let mut count = alloc::collections::BTreeMap::<RstRelation, Vec<String>>::new();
239 for (text, rst) in &classified {
240 count.entry(*rst).or_default().push(text.clone());
241 }
242 let mut diagnostics = Vec::new();
243 let total = classified.len() as f32;
244 for (_rst, connectives) in count {
245 let share = connectives.len() as f32 / total;
246 if share > self.max_share {
247 let mut occurrence = alloc::collections::BTreeMap::<String, usize>::new();
251 for c in &connectives {
252 *occurrence.entry(c.clone()).or_insert(0) += 1;
253 }
254 let dominant = occurrence
255 .into_iter()
256 .max_by_key(|(_, n)| *n)
257 .map(|(c, _)| c)
258 .unwrap_or_default();
259 diagnostics.push(Diagnostic {
260 diagnoser: "rst_relation_imbalance",
261 severity: share,
262 constraints: vec![RefineConstraint::BlacklistConnective(dominant)],
263 });
264 }
265 }
266 diagnostics
267 }
268}
269
270#[derive(Debug, Clone)]
277pub struct DocumentScopeRhythm {
278 pub min_stdev: f32,
279 pub min_sentences: usize,
280}
281
282impl Default for DocumentScopeRhythm {
283 fn default() -> Self {
284 Self {
285 min_stdev: 2.0,
286 min_sentences: 6,
287 }
288 }
289}
290
291impl Diagnoser for DocumentScopeRhythm {
292 fn name(&self) -> &'static str {
293 "document_scope_rhythm"
294 }
295
296 fn diagnose(
297 &self,
298 document: &RenderedDocument,
299 _profile: Option<&StyleProfile>,
300 ) -> Vec<Diagnostic> {
301 if document.sentences.len() < self.min_sentences {
302 return Vec::new();
303 }
304 let lengths: Vec<f32> = document
305 .sentences
306 .iter()
307 .map(|s| s.word_count as f32)
308 .collect();
309 let n = lengths.len() as f32;
310 let mean = lengths.iter().sum::<f32>() / n;
311 let variance = lengths
312 .iter()
313 .map(|x| {
314 let d = x - mean;
315 d * d
316 })
317 .sum::<f32>()
318 / n;
319 let stdev = approx_sqrt(variance);
320 if stdev < self.min_stdev {
321 let target = crate::style::LengthDistribution {
327 short: 0.4,
328 medium: 0.3,
329 long: 0.3,
330 short_max_words: 8,
331 medium_max_words: 18,
332 };
333 return vec![Diagnostic {
334 diagnoser: "document_scope_rhythm",
335 severity: (self.min_stdev - stdev).max(0.0) / self.min_stdev,
336 constraints: vec![RefineConstraint::TightenLengthDistribution(target)],
337 }];
338 }
339 Vec::new()
340 }
341}
342
343#[derive(Debug, Clone)]
352pub struct ConnectiveFamilySaturation {
353 pub max_per_family: usize,
354}
355
356impl Default for ConnectiveFamilySaturation {
357 fn default() -> Self {
358 Self { max_per_family: 4 }
359 }
360}
361
362impl Diagnoser for ConnectiveFamilySaturation {
363 fn name(&self) -> &'static str {
364 "connective_family_saturation"
365 }
366
367 fn diagnose(
368 &self,
369 document: &RenderedDocument,
370 _profile: Option<&StyleProfile>,
371 ) -> Vec<Diagnostic> {
372 let mut by_family = alloc::collections::BTreeMap::<ConnectorFamily, Vec<String>>::new();
373 for u in &document.connectives_used {
374 if let Some((family, _)) = classify(&u.connective) {
375 by_family
376 .entry(family)
377 .or_default()
378 .push(u.connective.clone());
379 }
380 }
381 let mut diagnostics = Vec::new();
382 for (_family, list) in by_family {
383 if list.len() > self.max_per_family {
384 let mut occurrence = alloc::collections::BTreeMap::<String, usize>::new();
385 for c in &list {
386 *occurrence.entry(c.clone()).or_insert(0) += 1;
387 }
388 let dominant = occurrence
389 .into_iter()
390 .max_by_key(|(_, n)| *n)
391 .map(|(c, _)| c)
392 .unwrap_or_default();
393 diagnostics.push(Diagnostic {
394 diagnoser: "connective_family_saturation",
395 severity: (list.len() as f32) / (self.max_per_family as f32),
396 constraints: vec![RefineConstraint::BlacklistConnective(dominant)],
397 });
398 }
399 }
400 diagnostics
401 }
402}
403
404#[derive(Debug, Clone)]
410pub struct ProfileDistributionDrift {
411 pub delta: f32,
412}
413
414impl Default for ProfileDistributionDrift {
415 fn default() -> Self {
416 Self { delta: 0.25 }
417 }
418}
419
420impl Diagnoser for ProfileDistributionDrift {
421 fn name(&self) -> &'static str {
422 "profile_distribution_drift"
423 }
424
425 fn diagnose(
426 &self,
427 document: &RenderedDocument,
428 profile: Option<&StyleProfile>,
429 ) -> Vec<Diagnostic> {
430 let Some(profile) = profile else {
431 return Vec::new();
432 };
433 let mut diagnostics = Vec::new();
434
435 if !profile.sentence_length.is_neutral() && !document.sentences.is_empty() {
437 let dist = &profile.sentence_length;
438 let mut counts = [0usize; 3];
439 for sentence in &document.sentences {
440 let bucket = if sentence.word_count <= dist.short_max_words as usize {
441 0
442 } else if sentence.word_count <= dist.medium_max_words as usize {
443 1
444 } else {
445 2
446 };
447 counts[bucket] += 1;
448 }
449 let total = document.sentences.len() as f32;
450 let observed = [
451 counts[0] as f32 / total,
452 counts[1] as f32 / total,
453 counts[2] as f32 / total,
454 ];
455 let target_sum = dist.short + dist.medium + dist.long;
456 if target_sum > 0.0 {
457 let target = [
458 dist.short / target_sum,
459 dist.medium / target_sum,
460 dist.long / target_sum,
461 ];
462 let max_diff = (0..3)
463 .map(|i| (observed[i] - target[i]).abs())
464 .fold(0.0_f32, f32::max);
465 if max_diff > self.delta {
466 diagnostics.push(Diagnostic {
467 diagnoser: "profile_distribution_drift",
468 severity: max_diff,
469 constraints: vec![RefineConstraint::TightenLengthDistribution(
470 dist.clone(),
471 )],
472 });
473 }
474 }
475 }
476
477 diagnostics
478 }
479}
480
481fn approx_sqrt(x: f32) -> f32 {
484 if x <= 0.0 {
485 return 0.0;
486 }
487 let mut g = if x >= 1.0 { x } else { 1.0 };
488 for _ in 0..6 {
489 g = 0.5 * (g + x / g);
490 }
491 g
492}
493
494pub fn default_set() -> Vec<alloc::sync::Arc<dyn Diagnoser>> {
500 use alloc::sync::Arc;
501 vec![
502 Arc::new(ParagraphOpenerMonotony::default()),
503 Arc::new(ListStyleFatigue::default()),
504 Arc::new(RstRelationImbalance::default()),
505 Arc::new(DocumentScopeRhythm::default()),
506 Arc::new(ConnectiveFamilySaturation::default()),
507 Arc::new(ProfileDistributionDrift::default()),
508 ]
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514 use crate::refine::{EventMeta, ParagraphRender, RenderedDocument};
515
516 fn doc_with_paragraph_openers(openers: &[Option<&str>]) -> RenderedDocument {
517 let paragraphs: Vec<ParagraphRender> = openers
521 .iter()
522 .enumerate()
523 .map(|(i, opener)| {
524 let text = match opener {
525 Some(o) => format!("Lead in para {i}. {o} continuation here."),
526 None => format!("Lead in para {i}. Continuation here."),
527 };
528 ParagraphRender {
529 text,
530 events: vec![
531 EventMeta {
532 connective: None,
533 list_style: None,
534 },
535 EventMeta {
536 connective: opener.map(|s| s.to_string()),
537 list_style: None,
538 },
539 ],
540 }
541 })
542 .collect();
543 RenderedDocument::from_paragraphs(paragraphs)
544 }
545
546 fn doc_with_list_styles(styles: &[ListStyle]) -> RenderedDocument {
547 let paragraphs: Vec<ParagraphRender> = styles
549 .iter()
550 .enumerate()
551 .map(|(i, ls)| ParagraphRender {
552 text: format!("Sentence {i} containing items."),
553 events: vec![EventMeta {
554 connective: None,
555 list_style: Some(*ls),
556 }],
557 })
558 .collect();
559 RenderedDocument::from_paragraphs(paragraphs)
560 }
561
562 fn doc_with_connectives(connectives: &[&str]) -> RenderedDocument {
563 let paragraphs: Vec<ParagraphRender> = connectives
564 .iter()
565 .enumerate()
566 .map(|(i, c)| ParagraphRender {
567 text: format!("{c} sentence number {i}."),
568 events: vec![EventMeta {
569 connective: Some((*c).to_string()),
570 list_style: None,
571 }],
572 })
573 .collect();
574 RenderedDocument::from_paragraphs(paragraphs)
575 }
576
577 fn doc_with_sentence_lengths(lengths: &[usize]) -> RenderedDocument {
578 let paragraphs: Vec<ParagraphRender> = lengths
580 .iter()
581 .map(|&n| {
582 let words = (0..n).map(|_| "word").collect::<Vec<_>>().join(" ");
583 ParagraphRender {
584 text: format!("{words}."),
585 events: vec![EventMeta::default()],
586 }
587 })
588 .collect();
589 RenderedDocument::from_paragraphs(paragraphs)
590 }
591
592 #[test]
595 fn paragraph_opener_monotony_fires_at_threshold() {
596 let doc = doc_with_paragraph_openers(&[
597 Some("Additionally,"),
598 Some("Additionally,"),
599 Some("Additionally,"),
600 Some("However,"),
601 ]);
602 let d = ParagraphOpenerMonotony::default().diagnose(&doc, None);
603 assert_eq!(d.len(), 1);
604 assert!(matches!(
605 &d[0].constraints[0],
606 RefineConstraint::BlacklistConnective(s) if s.starts_with("Additionally,")
607 ));
608 }
609
610 #[test]
611 fn paragraph_opener_monotony_silent_below_threshold() {
612 let doc = doc_with_paragraph_openers(&[
613 Some("Additionally,"),
614 Some("Additionally,"),
615 Some("Furthermore,"),
616 Some("However,"),
617 ]);
618 let d = ParagraphOpenerMonotony::default().diagnose(&doc, None);
619 assert!(d.is_empty());
620 }
621
622 #[test]
623 fn paragraph_opener_monotony_silent_for_short_docs() {
624 let doc = doc_with_paragraph_openers(&[Some("Additionally,"), Some("Additionally,")]);
625 let d = ParagraphOpenerMonotony::default().diagnose(&doc, None);
626 assert!(d.is_empty());
627 }
628
629 #[test]
632 fn list_style_fatigue_fires_when_one_style_dominates_window() {
633 let doc = doc_with_list_styles(&[
634 ListStyle::Including,
635 ListStyle::Including,
636 ListStyle::Including,
637 ListStyle::SuchAs,
638 ]);
639 let d = ListStyleFatigue::default().diagnose(&doc, None);
640 assert_eq!(d.len(), 1);
641 assert!(matches!(
642 d[0].constraints[0],
643 RefineConstraint::BlacklistListStyle(ListStyle::Including)
644 ));
645 }
646
647 #[test]
648 fn list_style_fatigue_silent_when_diverse() {
649 let doc = doc_with_list_styles(&[
650 ListStyle::Including,
651 ListStyle::SuchAs,
652 ListStyle::Dash,
653 ListStyle::Bracketed,
654 ]);
655 let d = ListStyleFatigue::default().diagnose(&doc, None);
656 assert!(d.is_empty());
657 }
658
659 #[test]
662 fn rst_imbalance_fires_when_one_relation_dominates() {
663 let doc = doc_with_connectives(&[
664 "Additionally,", "Additionally,",
666 "Furthermore,", "Additionally,",
668 "However,", ]);
670 let d = RstRelationImbalance::default().diagnose(&doc, None);
672 assert_eq!(d.len(), 1);
673 }
674
675 #[test]
676 fn rst_imbalance_silent_when_balanced() {
677 let doc = doc_with_connectives(&[
678 "Additionally,",
679 "Additionally,",
680 "However,",
681 "However,",
682 "Similarly,",
683 ]);
684 let d = RstRelationImbalance::default().diagnose(&doc, None);
685 assert!(d.is_empty());
686 }
687
688 #[test]
691 fn document_scope_rhythm_fires_when_lengths_are_flat() {
692 let doc = doc_with_sentence_lengths(&[10, 10, 10, 10, 10, 10]);
693 let d = DocumentScopeRhythm::default().diagnose(&doc, None);
694 assert_eq!(d.len(), 1);
695 assert!(matches!(
696 d[0].constraints[0],
697 RefineConstraint::TightenLengthDistribution(_)
698 ));
699 }
700
701 #[test]
702 fn document_scope_rhythm_silent_when_lengths_vary() {
703 let doc = doc_with_sentence_lengths(&[3, 12, 5, 18, 7, 14]);
704 let d = DocumentScopeRhythm::default().diagnose(&doc, None);
705 assert!(d.is_empty());
706 }
707
708 #[test]
711 fn connective_family_saturation_fires_above_budget() {
712 let doc = doc_with_connectives(&[
713 "Additionally,",
714 "Additionally,",
715 "Additionally,",
716 "Additionally,",
717 "Additionally,", ]);
719 let d = ConnectiveFamilySaturation::default().diagnose(&doc, None);
720 assert_eq!(d.len(), 1);
721 }
722
723 #[test]
724 fn connective_family_saturation_silent_at_budget() {
725 let doc =
726 doc_with_connectives(&["Additionally,", "Additionally,", "Furthermore,", "It also"]);
727 let d = ConnectiveFamilySaturation::default().diagnose(&doc, None);
728 assert!(d.is_empty());
729 }
730
731 #[test]
734 fn profile_drift_silent_without_profile() {
735 let doc = doc_with_sentence_lengths(&[3, 5, 7, 9]);
736 let d = ProfileDistributionDrift::default().diagnose(&doc, None);
737 assert!(d.is_empty());
738 }
739
740 #[test]
741 fn profile_drift_silent_with_neutral_profile() {
742 let doc = doc_with_sentence_lengths(&[3, 5, 7, 9]);
743 let p = StyleProfile::neutral();
744 let d = ProfileDistributionDrift::default().diagnose(&doc, Some(&p));
745 assert!(d.is_empty());
746 }
747
748 #[test]
749 fn profile_drift_fires_when_observed_misses_target() {
750 let doc = doc_with_sentence_lengths(&[3, 4, 5, 4, 3, 5]);
752 let target = crate::style::LengthDistribution {
753 short: 0.0,
754 medium: 0.0,
755 long: 1.0,
756 short_max_words: 8,
757 medium_max_words: 18,
758 };
759 let p = StyleProfile::builder("long-target")
760 .sentence_length(target)
761 .build()
762 .unwrap();
763 let d = ProfileDistributionDrift::default().diagnose(&doc, Some(&p));
764 assert_eq!(d.len(), 1);
765 }
766}