prosaic_core/discourse.rs
1#[cfg(not(feature = "std"))]
2use alloc::string::{String, ToString};
3#[cfg(not(feature = "std"))]
4use alloc::vec::Vec;
5
6use crate::collections::{HashMap, HashSet, VecDeque, new_map, new_set};
7
8/// A forward-looking center: an entity realized in an utterance with its
9/// grammatical-role-based salience rank (lower = more prominent).
10///
11/// Rank 0 corresponds to the Subject position; higher ranks correspond to
12/// Object (1), Indirect Object / Location (2), and Oblique (3+).
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub struct Cf {
16 /// Entity name as passed to `mention_entity` or `mention_entity_ranked`.
17 pub name: String,
18 /// Grammatical-role-based rank (lower = more prominent). Rank 0 is Subject.
19 pub rank: u8,
20}
21
22/// Centering Theory transition class between consecutive utterances.
23///
24/// Prefer (in order): `Continue` > `Retain` > `SmoothShift` > `RoughShift`.
25/// `NoCb` means no coherent transition could be classified (first render,
26/// post-reset, or utterance with no entities).
27///
28/// Based on Grosz, Joshi & Weinstein (1995).
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31pub enum Transition {
32 /// Cb(n) == Cb(n−1) and Cb(n) == Cp(n): most coherent, entity in focus stays.
33 Continue,
34 /// Cb(n) == Cb(n−1) but Cb(n) != Cp(n): coherent but not the most salient entity.
35 Retain,
36 /// Cb(n) != Cb(n−1) but Cb(n) == Cp(n): focus shifts cleanly to the new center.
37 SmoothShift,
38 /// Cb(n) != Cb(n−1) and Cb(n) != Cp(n): least coherent shift.
39 RoughShift,
40 /// No transition could be classified: first render, post-reset, or no entities.
41 NoCb,
42}
43
44/// Private word interner. Maps lowercased words to stable `u32` ids.
45/// Lowercasing happens at intern time; callers must pass already-lowercased
46/// input to `intern`/`get`.
47#[derive(Debug, Clone, Default)]
48struct WordInterner {
49 /// Lowercased word → u32 id.
50 by_word: HashMap<String, u32>,
51 /// Reverse map for debugging. Indexed by id.
52 by_id: Vec<String>,
53}
54
55impl WordInterner {
56 fn intern(&mut self, word: &str) -> u32 {
57 if let Some(&id) = self.by_word.get(word) {
58 return id;
59 }
60 let id = self.by_id.len() as u32;
61 let owned = word.to_string();
62 self.by_word.insert(owned.clone(), id);
63 self.by_id.push(owned);
64 id
65 }
66
67 fn get(&self, word: &str) -> Option<u32> {
68 self.by_word.get(word).copied()
69 }
70}
71
72/// Tracks discourse state across multiple render calls for natural output.
73///
74/// This is the engine's internal memory — it knows what entities were recently
75/// mentioned, what templates were recently used, what connectives were recently
76/// inserted, and what words appeared in recent output.
77#[derive(Debug, Clone)]
78pub struct DiscourseState {
79 /// Tracks entities by name → (entity_type, render_index_of_last_mention).
80 entities: HashMap<String, EntityMention>,
81
82 /// The current render index (incremented each render call).
83 render_index: usize,
84
85 /// The name of the most recently mentioned entity (for pronoun resolution).
86 focus_entity: Option<String>,
87
88 /// Last template variant index used per template key (for anti-repeat).
89 template_history: HashMap<String, usize>,
90
91 /// Recently used discourse connectives (ring buffer, max 6).
92 connective_history: VecDeque<String>,
93
94 /// Per-decision family slot for connective selection: `Some(family)`
95 /// when a connective was emitted, `None` when the family budget
96 /// suppressed one so the sentence ran plain. Tracked alongside
97 /// `connective_history` but including null slots so dense
98 /// same-family runs can be detected even when exact strings differ.
99 connective_family_history: VecDeque<Option<ConnectorFamily>>,
100
101 /// The template key used in the previous render (for relationship detection).
102 last_template_key: Option<String>,
103
104 /// The primary entity name from the previous render.
105 last_entity_name: Option<String>,
106
107 /// Non-stopword tokens from recent renders, with render_index.
108 /// Words are stored as interned `u32` ids — see `interner`.
109 /// Kept for a window of the last 5 renders.
110 word_history: VecDeque<(usize, HashSet<u32>)>,
111
112 /// Word counts from recently emitted sentences. Used to avoid a flat
113 /// mid-length cadence when multiple template variants are available.
114 sentence_length_history: VecDeque<usize>,
115
116 /// Word interner shared across all render history. Lowercasing happens
117 /// once at intern time; all subsequent lookups use pre-lowercased ids.
118 interner: WordInterner,
119
120 /// Pre-interned ids for every stopword in `STOPWORDS`. Populated once
121 /// during construction so `record_output_words` never scans strings.
122 stopword_ids: HashSet<u32>,
123
124 /// Monotonic cycle index used by [`Self::next_list_style`]. The selected
125 /// style is found by walking `LIST_STYLES` from this index forward,
126 /// skipping any style currently in `recent_list_styles`. The index is
127 /// advanced past the picked slot.
128 ///
129 /// Persists across paragraph-boundary resets so consecutive paragraphs
130 /// rotate through the list-style pool instead of restarting at the same
131 /// phrasing every time. This mirrors the cross-paragraph semantics of
132 /// `Session::last_temporal_anchor`. Use [`Self::reset_list_cycle`] (or
133 /// the [`DiscourseState::reset`] hard reset) to clear it.
134 last_list_style: usize,
135
136 /// Trailing window of recently chosen list styles, capped at
137 /// [`LIST_STYLE_RECENT_WINDOW`]. Both auto-picked and explicitly forced
138 /// styles are recorded here so the next auto pick deterministically
139 /// avoids them. Persists across paragraph resets alongside
140 /// `last_list_style`; cleared by [`Self::reset_list_cycle`] and the
141 /// full [`Self::reset`].
142 recent_list_styles: VecDeque<ListStyle>,
143
144 /// Whether the current focus is a compound/plural subject, so pronoun
145 /// continuations should use "they/them" instead of "it".
146 focus_is_plural: bool,
147
148 /// Backward-looking center for the NEXT render. Updated at the end of each
149 /// successful render via `advance_cb`. `None` before the first render, after
150 /// a reset, or when no coherent transition is available (Rough Shift).
151 cb: Option<String>,
152
153 /// Focus entity of the render immediately before the current one. Used to
154 /// compute Cb transitions. Different from `focus_entity`: that tracks the
155 /// current render's focus; this tracks what `focus_entity` was at the point
156 /// `advance_cb` was last called.
157 previous_focus: Option<String>,
158
159 /// Forward-looking centers being built during the CURRENT render.
160 /// Populated by `mention_entity_ranked`, cleared by `begin_render`.
161 /// Ordered by rank ascending (lowest rank first); ties broken by insertion
162 /// order. The first element is the Cp (preferred center).
163 current_cf: Vec<Cf>,
164
165 /// Forward-looking centers from the PREVIOUS render. Set by
166 /// `compute_cb_transition` as a snapshot of `current_cf`. Used to
167 /// identify the Cb as the highest-ranked Cf member shared with the
168 /// previous utterance.
169 previous_cf: Vec<Cf>,
170
171 /// Transition classification computed by the most recent `advance_cb`
172 /// call. `Transition::NoCb` before any render or after a reset.
173 last_transition: Transition,
174
175 /// List style chosen by the most recent `|join` pipe during the
176 /// current render. `None` when no `|join` fired. Cleared at the
177 /// start of every render so [`RenderExplanation`] always reports
178 /// the value for *this* render.
179 last_list_style_used: Option<ListStyle>,
180
181 /// Whether the most recent render's Silent-mode cleanup stripped
182 /// any trailing orphan words. Cleared at the start of every render.
183 /// Exposed via [`RenderExplanation::cleanup_stripped_tail`].
184 last_cleanup_stripped_tail: bool,
185}
186
187#[derive(Debug, Clone)]
188struct EntityMention {
189 entity_type: String,
190 last_mentioned: usize,
191 mention_count: usize,
192}
193
194/// How an entity should be referred to based on discourse context.
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
197pub enum ReferenceForm {
198 /// Full form: "The class UserService"
199 Full,
200 /// Name only: "UserService"
201 ShortName,
202 /// Pronoun: "it" / "they" / (lang-specific)
203 Pronoun,
204 /// Demonstrative determiner + type: "this class" / (lang-specific).
205 /// Reserved slot for future discourse rules; not currently emitted by
206 /// `DiscourseState::reference_form`.
207 Demonstrative,
208 /// Possessive pronoun/determiner: "its" / "their" / (lang-specific).
209 /// Used by the `{name|possessive}` pipe after the standard discourse
210 /// policy has decided that a pronoun-form reference is appropriate.
211 Possessive,
212 /// Zero realization: surface is empty. Used by pro-drop languages
213 /// (Japanese, colloquial Spanish/Italian) where the pronoun is
214 /// recoverable from context and the slot emits nothing.
215 /// Not currently emitted by the default `DiscourseState::reference_form`;
216 /// language-specific discourse extensions may choose this form.
217 Zero,
218}
219
220/// The relationship detected between consecutive renders.
221#[derive(Debug, Clone, PartialEq, Eq)]
222pub enum DiscourseRelation {
223 /// Same entity, different action
224 SameEntityDifferentAction,
225 /// Different entity, same action type
226 DifferentEntitySameAction,
227 /// Contrasting actions (e.g., add vs delete)
228 Contrast,
229 /// No detectable relationship
230 None,
231}
232
233/// List formatting style.
234#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
235#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
236pub enum ListStyle {
237 /// "including A, B, and C among others"
238 Including,
239 /// "such as A, B, and C"
240 SuchAs,
241 /// "— notably A, B, and C, plus N others"
242 Dash,
243 /// "[A, B, and C, and N more]" (original format)
244 Bracketed,
245 /// "A, B, and C, among others" — postfix qualifier, drops remainder count.
246 AmongOthers,
247 /// "A, B, and C, to name a few" — postfix qualifier, drops remainder count.
248 ToNameAFew,
249 /// "A, B, and C, plus N more" — postfix qualifier, uses remainder count.
250 PlusMore,
251}
252
253const CONNECTIVE_WINDOW: usize = 6;
254
255/// Sliding-window length used by the connector-family budget. A family is
256/// allowed at most `pool.len()` emissions inside this window before
257/// `select_connective` starts returning `None` so the next follow-on
258/// sentence renders plain. Sized to give the surface text two or three
259/// null slots after a fully saturated pool, which is what dissolves the
260/// `Similarly,/Likewise,` style alternation Matt flagged in service-shape
261/// prose.
262const FAMILY_WINDOW: usize = 5;
263
264/// Score deduction applied when a candidate would form an A/B/A
265/// alternation with the immediately preceding two emissions. Distances
266/// for unused candidates sit at `CONNECTIVE_WINDOW + 1`, so the penalty
267/// is large enough to demote a recently-seen alternation partner below
268/// any unused option but small enough to leave the LRU recycle cycle
269/// (A,B,C → A,B,C) unchanged when the pool offers a third choice.
270const ALTERNATION_PENALTY: i64 = 2;
271
272const WORD_HISTORY_WINDOW: usize = 5;
273const SENTENCE_RHYTHM_WINDOW: usize = 6;
274const ENTITY_REINTRODUCE_DISTANCE: usize = 3;
275
276/// Per-sentence penalty applied when consecutive sentences land on the same
277/// side of the running mean length. Small relative to the existing closeness
278/// (max 3.0) and mean-delta (max 1.0) contributions so it acts as a cadence
279/// tie-breaker rather than dominating the rhythm score.
280const SAME_SIDE_PENALTY: f64 = 0.75;
281
282/// Mean-delta threshold (in words) below which a sentence is treated as
283/// "at the mean" and contributes no same-side signal. Avoids spurious
284/// pivots when lengths sit exactly on or fractionally beside the mean.
285const SIDE_OF_MEAN_NEUTRAL_BAND: f64 = 0.5;
286
287#[derive(Copy, Clone, PartialEq, Eq)]
288enum CadenceSide {
289 Above,
290 Below,
291}
292
293fn side_of_mean(len: f64, mean: f64) -> Option<CadenceSide> {
294 let delta = len - mean;
295 if delta.abs() < SIDE_OF_MEAN_NEUTRAL_BAND {
296 None
297 } else if delta > 0.0 {
298 Some(CadenceSide::Above)
299 } else {
300 Some(CadenceSide::Below)
301 }
302}
303
304/// Stopwords excluded from the word frequency map.
305const STOPWORDS: &[&str] = &[
306 "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
307 "from", "is", "was", "are", "were", "be", "been", "being", "have", "has", "had", "do", "does",
308 "did", "will", "would", "could", "should", "may", "might", "shall", "can", "not", "no", "it",
309 "its", "this", "that", "these", "those", "which", "who", "what", "where", "when", "how", "if",
310 "then", "than", "so", "as", "up", "out", "into", "also", "just", "more", "most",
311];
312
313const LIST_STYLES: &[ListStyle] = &[
314 ListStyle::Including,
315 ListStyle::SuchAs,
316 ListStyle::Dash,
317 ListStyle::Bracketed,
318 ListStyle::AmongOthers,
319 ListStyle::ToNameAFew,
320 ListStyle::PlusMore,
321];
322
323/// Number of recent list-style picks remembered for anti-repeat. Each call to
324/// [`DiscourseState::next_list_style`] (and explicit recordings via
325/// [`DiscourseState::record_list_style_used`]) skips any style that appears in
326/// the trailing window, so consecutive truncated lists never repeat phrasing
327/// even when a forced style and the auto-cycle would otherwise collide.
328const LIST_STYLE_RECENT_WINDOW: usize = 2;
329
330/// Connective pools by relationship type.
331const SAME_ENTITY_CONNECTIVES: &[&str] = &["Additionally,", "Furthermore,", "It also"];
332
333const SAME_ACTION_CONNECTIVES: &[&str] = &["Similarly,", "Likewise,"];
334
335const CONTRAST_CONNECTIVES: &[&str] = &["Meanwhile,", "However,", "On the other hand,"];
336
337/// Number of distinct list styles in the cycle.
338pub(crate) fn list_styles_count() -> usize {
339 LIST_STYLES.len()
340}
341
342/// Lexical family a connector belongs to. The exact-string anti-repeat
343/// only sees individual connectors; the family lets the budget reason
344/// about whole categories ("similarity/continuation/contrast") so a
345/// two-element pool cannot lock the prose into an A/B/A/B alternation.
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
347enum ConnectorFamily {
348 /// Continuation/expansion: "Additionally,", "Furthermore,", "It also".
349 Continuation,
350 /// Similarity: "Similarly,", "Likewise,".
351 Similarity,
352 /// Contrast: "Meanwhile,", "However,", "On the other hand,".
353 Contrast,
354}
355
356fn family_for_relation(relation: &DiscourseRelation) -> Option<ConnectorFamily> {
357 match relation {
358 DiscourseRelation::SameEntityDifferentAction => Some(ConnectorFamily::Continuation),
359 DiscourseRelation::DifferentEntitySameAction => Some(ConnectorFamily::Similarity),
360 DiscourseRelation::Contrast => Some(ConnectorFamily::Contrast),
361 DiscourseRelation::None => None,
362 }
363}
364
365/// Map a connective string to its lexical family by membership in the
366/// per-relation pools. Returns `None` for strings outside the known set
367/// (e.g. discourse markers from `Default-Language`); those don't count
368/// toward the family-budget gate, matching the engine's accounting.
369fn family_for_connective(connective: &str) -> Option<ConnectorFamily> {
370 if SAME_ENTITY_CONNECTIVES.contains(&connective) {
371 Some(ConnectorFamily::Continuation)
372 } else if SAME_ACTION_CONNECTIVES.contains(&connective) {
373 Some(ConnectorFamily::Similarity)
374 } else if CONTRAST_CONNECTIVES.contains(&connective) {
375 Some(ConnectorFamily::Contrast)
376 } else {
377 None
378 }
379}
380
381impl DiscourseState {
382 pub fn new() -> Self {
383 let mut interner = WordInterner::default();
384 // Pre-intern all stopwords so membership checks are O(1) u32 lookups.
385 let stopword_ids: HashSet<u32> = STOPWORDS.iter().map(|&w| interner.intern(w)).collect();
386
387 Self {
388 entities: new_map(),
389 render_index: 0,
390 focus_entity: None,
391 template_history: new_map(),
392 connective_history: VecDeque::new(),
393 connective_family_history: VecDeque::new(),
394 last_template_key: None,
395 last_entity_name: None,
396 word_history: VecDeque::new(),
397 sentence_length_history: VecDeque::new(),
398 interner,
399 stopword_ids,
400 last_list_style: 0,
401 recent_list_styles: VecDeque::with_capacity(LIST_STYLE_RECENT_WINDOW),
402 last_list_style_used: None,
403 last_cleanup_stripped_tail: false,
404 focus_is_plural: false,
405 cb: None,
406 previous_focus: None,
407 current_cf: Vec::new(),
408 previous_cf: Vec::new(),
409 last_transition: Transition::NoCb,
410 }
411 }
412
413 /// Mark the current focus as a compound/plural subject so the next
414 /// pronoun reference uses "they" rather than "it".
415 pub fn set_focus_plural(&mut self, plural: bool) {
416 self.focus_is_plural = plural;
417 }
418
419 /// Whether the current focus is a plural/compound subject.
420 pub fn focus_is_plural(&self) -> bool {
421 self.focus_is_plural
422 }
423
424 /// Clear ALL discourse state, including the cross-paragraph list-style
425 /// cycle counter. Use when starting a fully unrelated narrative — most
426 /// callers want [`Self::reset_for_paragraph`] instead so consecutive
427 /// paragraphs continue to rotate list-style phrasings.
428 pub fn reset(&mut self) {
429 *self = Self::new();
430 }
431
432 /// Clear discourse state at a paragraph boundary while preserving the
433 /// narrative-level stylistic anti-repeat machinery. This is the reset
434 /// used by [`Session::reset_for_paragraph`] so multi-paragraph narratives
435 /// don't restart variant cycles, list-style rotation, word-repetition
436 /// penalties, or sentence-rhythm memory on every paragraph break.
437 ///
438 /// **Preserved (narrative-level):** `last_list_style` and
439 /// `recent_list_styles` (list-style cycle plus anti-repeat window),
440 /// `template_history` (variant anti-repeat), `connective_history`
441 /// (connective anti-repeat), `word_history` plus `interner` (repetition
442 /// scoring), `sentence_length_history` (cadence/rhythm scoring),
443 /// `render_index` (so word-history distances stay correct and
444 /// `has_prior_render` keeps reporting earlier discourse exists).
445 ///
446 /// **Cleared (paragraph-local):** the entity table, focus entity and its
447 /// plurality, `last_template_key`/`last_entity_name` (so cross-paragraph
448 /// relation/connective inference is suppressed), the Centering Theory
449 /// `Cb`/`Cf` machinery (`cb`, `previous_focus`, `current_cf`,
450 /// `previous_cf`, `last_transition`), and per-render diagnostic signals.
451 ///
452 /// The clearance set is the load-bearing invariant: anaphora must not
453 /// resolve to entities introduced in an earlier paragraph, and rhetorical
454 /// connectives ("Furthermore,", "However,") must not jump paragraph
455 /// boundaries.
456 pub fn reset_for_paragraph(&mut self) {
457 // Pronoun/anaphora sources.
458 self.entities.clear();
459 self.focus_entity = None;
460 self.focus_is_plural = false;
461 // Relation-detection inputs (drive cross-render connective insertion).
462 self.last_template_key = None;
463 self.last_entity_name = None;
464 // Centering Theory state.
465 self.cb = None;
466 self.previous_focus = None;
467 self.current_cf.clear();
468 self.previous_cf.clear();
469 self.last_transition = Transition::NoCb;
470 // Per-render diagnostics.
471 self.last_list_style_used = None;
472 self.last_cleanup_stripped_tail = false;
473 // Intentionally retained: last_list_style, recent_list_styles,
474 // template_history, connective_history,
475 // connective_family_history, word_history,
476 // sentence_length_history, interner, stopword_ids, render_index.
477 }
478
479 /// Clear only the list-style cycle counter and its anti-repeat window.
480 /// Mirrors [`Session::reset_temporal`] for callers that want to start a
481 /// fresh list-style rotation without otherwise resetting discourse state.
482 pub fn reset_list_cycle(&mut self) {
483 self.last_list_style = 0;
484 self.recent_list_styles.clear();
485 }
486
487 /// Advance to the next render. Must be called at the start of each render.
488 pub fn begin_render(&mut self) {
489 self.render_index += 1;
490 self.current_cf.clear();
491 // Reset per-render diagnostic signals so `RenderExplanation`
492 // always reports the value for THIS render rather than inheriting
493 // state from a previous one.
494 self.last_list_style_used = None;
495 self.last_cleanup_stripped_tail = false;
496 }
497
498 /// Record that an entity was mentioned in the current render at rank 0
499 /// (Subject position). Delegates to [`Self::mention_entity_ranked`].
500 ///
501 /// Resets the focus-plural flag — compound subjects must mark
502 /// themselves explicitly via [`Self::set_focus_plural`].
503 pub fn mention_entity(&mut self, name: &str, entity_type: &str) {
504 self.mention_entity_ranked(name, entity_type, 0);
505 }
506
507 /// Record that an entity was mentioned in the current render with an
508 /// explicit grammatical-role rank. Lower rank = more prominent.
509 ///
510 /// Rank convention:
511 /// - 0: Subject (most prominent — the Cp candidate)
512 /// - 1: Direct Object
513 /// - 2: Indirect Object / Location
514 /// - 3+: Oblique / other
515 ///
516 /// The entity is inserted into `current_cf` in rank-ascending order.
517 /// If the entity is already in the Cf list, the lower of the two ranks
518 /// is kept (a subject mention always beats an object mention).
519 ///
520 /// `focus_entity` is updated when rank == 0 or when no focus has been
521 /// set yet for this render; this keeps the Cp semantics: the Subject is
522 /// the preferred center.
523 pub fn mention_entity_ranked(&mut self, name: &str, entity_type: &str, rank: u8) {
524 let entry = self
525 .entities
526 .entry(name.to_string())
527 .or_insert(EntityMention {
528 entity_type: entity_type.to_string(),
529 last_mentioned: 0,
530 mention_count: 0,
531 });
532 entry.last_mentioned = self.render_index;
533 entry.mention_count += 1;
534 entry.entity_type = entity_type.to_string();
535
536 // Update focus_entity (= Cp) when this is the most prominent slot
537 // (rank 0) or when no focus has been established yet this render.
538 if rank == 0 || self.focus_entity.is_none() {
539 self.focus_entity = Some(name.to_string());
540 self.last_entity_name = Some(name.to_string());
541 self.focus_is_plural = false;
542 }
543
544 // Insert into current_cf, deduplicating by name (keep lower rank).
545 if let Some(existing) = self.current_cf.iter_mut().find(|c| c.name == name) {
546 if rank < existing.rank {
547 existing.rank = rank;
548 // Re-sort after rank update.
549 self.current_cf.sort_by_key(|c| c.rank);
550 }
551 } else {
552 self.current_cf.push(Cf {
553 name: name.to_string(),
554 rank,
555 });
556 // Sort stably so Cp = first element.
557 self.current_cf.sort_by_key(|c| c.rank);
558 }
559 }
560
561 /// Profile-aware variant of [`Self::reference_form`].
562 ///
563 /// `PronounDensity::Default` is identical to `reference_form`. `Low`
564 /// demotes any computed `Pronoun` to `ShortName`, biasing toward
565 /// formal register that keeps full names visible longer. `High`
566 /// promotes a `ShortName` to `Pronoun` when the entity is recent
567 /// enough (distance ≤ 2) and not in an ambiguity context — biasing
568 /// toward conversational register.
569 pub fn reference_form_with_density(
570 &self,
571 name: &str,
572 density_low: bool,
573 density_high: bool,
574 ) -> ReferenceForm {
575 let raw = self.reference_form(name);
576 if density_low {
577 return match raw {
578 ReferenceForm::Pronoun => ReferenceForm::ShortName,
579 other => other,
580 };
581 }
582 if density_high && raw == ReferenceForm::ShortName && self.is_pronoun_eligible_relaxed(name)
583 {
584 return ReferenceForm::Pronoun;
585 }
586 raw
587 }
588
589 fn is_pronoun_eligible_relaxed(&self, name: &str) -> bool {
590 let Some(mention) = self.entities.get(name) else {
591 return false;
592 };
593 let distance = self.render_index.saturating_sub(mention.last_mentioned);
594 if distance == 0 || distance > 2 {
595 return false;
596 }
597 if self.has_ambiguity(name) {
598 return false;
599 }
600 true
601 }
602
603 /// Determine how to refer to an entity given discourse history.
604 pub fn reference_form(&self, name: &str) -> ReferenceForm {
605 let mention = match self.entities.get(name) {
606 Some(m) => m,
607 None => return ReferenceForm::Full,
608 };
609
610 let distance = self.render_index.saturating_sub(mention.last_mentioned);
611
612 // If it's been too long, reintroduce with full form.
613 if distance >= ENTITY_REINTRODUCE_DISTANCE {
614 return ReferenceForm::Full;
615 }
616
617 // Candidate for pronoun under existing distance/focus/ambiguity rules.
618 let pronoun_candidate = distance == 1
619 && self.focus_entity.as_deref() == Some(name)
620 && !self.has_ambiguity(name);
621
622 if pronoun_candidate {
623 // Centering Theory Rule 1 gate:
624 // If any element of Cf(Ui) is realized as a pronoun in Ui+1,
625 // then the Cb(Ui+1) must also be realized as a pronoun.
626 //
627 // Practically: only pronominalize when the referent IS the Cb, or
628 // when there is no Cb yet (fresh discourse / post-reset / first
629 // named entity). If the Cb is a *different* entity, demoting to
630 // ShortName avoids an ambiguous pronoun resolution.
631 match self.cb.as_deref() {
632 // No Cb yet (first render or post-reset) — fall through to pronoun.
633 None => return ReferenceForm::Pronoun,
634 // Referent IS the Cb — Rule 1 permits pronominalization.
635 Some(cb_name) if cb_name == name => return ReferenceForm::Pronoun,
636 // Referent is NOT the Cb — Rule 1 demotes to ShortName to
637 // prevent an ambiguous pronoun whose referent is the Cb entity.
638 Some(_) => return ReferenceForm::ShortName,
639 }
640 }
641
642 // Short name for entities mentioned recently but not pronoun-eligible.
643 if distance > 0 && distance < ENTITY_REINTRODUCE_DISTANCE {
644 return ReferenceForm::ShortName;
645 }
646
647 ReferenceForm::Full
648 }
649
650 /// Check if there are multiple recently-mentioned entities that could cause
651 /// ambiguity when using a pronoun.
652 fn has_ambiguity(&self, name: &str) -> bool {
653 let recent_count = self
654 .entities
655 .iter()
656 .filter(|(n, m)| {
657 n.as_str() != name && self.render_index.saturating_sub(m.last_mentioned) <= 2
658 })
659 .count();
660 recent_count > 0
661 }
662
663 /// Record which template variant was selected for anti-repeat.
664 pub fn record_template_choice(&mut self, key: &str, variant_index: usize) {
665 self.template_history.insert(key.to_string(), variant_index);
666 self.last_template_key = Some(key.to_string());
667 }
668
669 /// Get the last variant index used for a key (to avoid repeating it).
670 pub fn last_template_variant(&self, key: &str) -> Option<usize> {
671 self.template_history.get(key).copied()
672 }
673
674 /// Detect the relationship between the current render and the previous one.
675 ///
676 /// Both entities must be present (and comparable) to assert a "same
677 /// entity" or "different entity" relationship — otherwise the engine
678 /// would incorrectly emit e.g. a *Similarly,* connective for a
679 /// repeated entity-less template, where no entity comparison is
680 /// actually meaningful.
681 pub fn detect_relation(
682 &self,
683 current_key: &str,
684 current_entity: Option<&str>,
685 ) -> DiscourseRelation {
686 let last_key = match &self.last_template_key {
687 Some(k) => k.as_str(),
688 None => return DiscourseRelation::None,
689 };
690
691 let last_entity = self.last_entity_name.as_deref();
692 let both_have_entities = current_entity.is_some() && last_entity.is_some();
693 let same_entity = both_have_entities && current_entity == last_entity;
694 let different_entity = both_have_entities && current_entity != last_entity;
695
696 let same_action = keys_share_action(current_key, last_key);
697 let contrasting = keys_contrast(current_key, last_key);
698
699 if same_entity && !same_action {
700 DiscourseRelation::SameEntityDifferentAction
701 } else if different_entity && same_action {
702 DiscourseRelation::DifferentEntitySameAction
703 } else if contrasting && both_have_entities {
704 DiscourseRelation::Contrast
705 } else {
706 DiscourseRelation::None
707 }
708 }
709
710 /// Select a discourse connective for the given relation, preferring
711 /// candidates absent from recent history. Three deterministic
712 /// guardrails layer on top of the LRU pick:
713 ///
714 /// 1. **Connector-family budget.** Each pool maps to a lexical family
715 /// (continuation, similarity, contrast). When the family already
716 /// contributes `pool.len()` emissions inside the trailing
717 /// `FAMILY_WINDOW`, return `None` so the next sentence renders
718 /// plain. This is the lever that breaks the
719 /// `Similarly,/Likewise,/Similarly,/Likewise,` pattern Matt flagged
720 /// in service-shape prose: the two-element similarity pool is
721 /// forced to alternate after two emissions, so the third call
722 /// drops the connective entirely.
723 /// 2. **Exact-connector cooldown.** The immediately preceding
724 /// connective is excluded from candidacy when the pool offers an
725 /// alternative — preserves the existing back-to-back anti-repeat.
726 /// 3. **A/B alternation penalty.** Candidates equal to
727 /// `connective_history[len-2]` take a score deduction so the LRU
728 /// pick will not extend an A/B pattern into A/B/A when a fresh
729 /// option exists. For three-element pools this preserves the
730 /// A,B,C cycle; for two-element pools the family budget kicks in
731 /// first and the penalty is moot.
732 pub fn select_connective(&mut self, relation: &DiscourseRelation) -> Option<&'static str> {
733 self.select_connective_filtered(relation, None, None, None)
734 }
735
736 /// Profile-aware variant of [`Self::select_connective`].
737 ///
738 /// `allowed` (when `Some`) restricts the candidate pool to connectives
739 /// also present in the slice. If the resulting pool is empty (every
740 /// allowed entry was filtered by the existing anti-repeat or family
741 /// budget logic, OR no allowed entries match the base pool at all),
742 /// the engine falls back to the unfiltered base pool — profile
743 /// preferences are biases, never hard constraints.
744 ///
745 /// `preferred` (when `Some`) adds a per-connective tie-breaker bonus
746 /// to the existing distance/alternation score. Weights are interpreted
747 /// in `0.0..=1.0` and scaled by 10 to land in the same rough magnitude
748 /// as the existing scoring terms.
749 ///
750 /// `forbidden` (when `Some`) is a strict subtractive filter applied
751 /// *after* the allowed/fallback computation — used by the
752 /// retrospective refine pass for `BlacklistConnective` constraints.
753 /// Unlike `allowed`, an empty post-`forbidden` pool emits `None`
754 /// rather than falling back: that's the whole point of a blacklist.
755 pub fn select_connective_filtered(
756 &mut self,
757 relation: &DiscourseRelation,
758 allowed: Option<&[&str]>,
759 preferred: Option<&[(&str, f32)]>,
760 forbidden: Option<&[&str]>,
761 ) -> Option<&'static str> {
762 let base_pool: &[&'static str] = match relation {
763 DiscourseRelation::SameEntityDifferentAction => SAME_ENTITY_CONNECTIVES,
764 DiscourseRelation::DifferentEntitySameAction => SAME_ACTION_CONNECTIVES,
765 DiscourseRelation::Contrast => CONTRAST_CONNECTIVES,
766 DiscourseRelation::None => return None,
767 };
768 let family = family_for_relation(relation)
769 .expect("non-None relation always maps to a connector family");
770
771 // Apply the profile-allowed filter when one is supplied. An empty
772 // post-filter pool falls through to the base pool — profile
773 // preferences are biases, not hard constraints.
774 let filtered: Option<Vec<&'static str>> = allowed.map(|allow| {
775 base_pool
776 .iter()
777 .copied()
778 .filter(|c| allow.contains(c))
779 .collect()
780 });
781 let after_allowed: &[&'static str] = match &filtered {
782 Some(v) if !v.is_empty() => v.as_slice(),
783 _ => base_pool,
784 };
785
786 // Apply the strict-forbidden filter (refine-pass blacklist) on
787 // top of `after_allowed`. Empty post-forbidden pool → no
788 // connective emitted (None). This is the intentional asymmetry
789 // with `allowed`: blacklist is a hard constraint.
790 let strictly_filtered: Option<Vec<&'static str>> = forbidden.map(|forbid| {
791 after_allowed
792 .iter()
793 .copied()
794 .filter(|c| !forbid.contains(c))
795 .collect()
796 });
797 let pool_owned: Vec<&'static str>;
798 let pool: &[&'static str] = match &strictly_filtered {
799 Some(v) => {
800 if v.is_empty() {
801 self.record_family_slot(None);
802 return None;
803 }
804 pool_owned = v.clone();
805 pool_owned.as_slice()
806 }
807 None => after_allowed,
808 };
809
810 // Family-budget gate: count this family's emissions inside the
811 // trailing window. Once they saturate the (effective) pool,
812 // suppress the connective so the prose continues without a
813 // transition cue.
814 let family_count = self
815 .connective_family_history
816 .iter()
817 .rev()
818 .take(FAMILY_WINDOW)
819 .filter(|slot| **slot == Some(family))
820 .count();
821 if family_count >= pool.len() {
822 self.record_family_slot(None);
823 return None;
824 }
825
826 let immediate = self.connective_history.back().map(String::as_str);
827 let two_back = self
828 .connective_history
829 .iter()
830 .rev()
831 .nth(1)
832 .map(String::as_str);
833
834 let prefer_bonus = |connective: &str| -> i64 {
835 let Some(prefs) = preferred else {
836 return 0;
837 };
838 prefs
839 .iter()
840 .find_map(|(s, w)| if *s == connective { Some(*w) } else { None })
841 .map(|w| (w * 10.0) as i64)
842 .unwrap_or(0)
843 };
844
845 let mut selected: Option<&'static str> = None;
846 let mut selected_score: i64 = i64::MIN;
847
848 for &connective in pool {
849 if pool.len() > 1 && immediate == Some(connective) {
850 continue;
851 }
852
853 let distance = self
854 .connective_history
855 .iter()
856 .rev()
857 .position(|history| history == connective)
858 .unwrap_or(CONNECTIVE_WINDOW + 1) as i64;
859
860 let alternation_penalty = if pool.len() > 1 && two_back == Some(connective) {
861 ALTERNATION_PENALTY
862 } else {
863 0
864 };
865 let score = distance - alternation_penalty + prefer_bonus(connective);
866
867 if selected.is_none() || score > selected_score {
868 selected = Some(connective);
869 selected_score = score;
870 }
871 }
872
873 let connective = selected?;
874 self.connective_history.push_back(connective.to_string());
875 if self.connective_history.len() > CONNECTIVE_WINDOW {
876 self.connective_history.pop_front();
877 }
878 self.record_family_slot(Some(family));
879
880 Some(connective)
881 }
882
883 /// Push a per-decision family slot, capping the ring buffer at
884 /// `FAMILY_WINDOW + 2` so the budget check has the full window plus
885 /// a small lookahead margin without growing without bound.
886 fn record_family_slot(&mut self, slot: Option<ConnectorFamily>) {
887 self.connective_family_history.push_back(slot);
888 if self.connective_family_history.len() > FAMILY_WINDOW + 2 {
889 self.connective_family_history.pop_front();
890 }
891 }
892
893 /// Record the words from a rendered output for repetition scoring.
894 pub fn record_output_words(&mut self, output: &str) {
895 let mut ids: HashSet<u32> = new_set();
896 for raw in output.split_whitespace() {
897 let w = raw
898 .trim_matches(|c: char| !c.is_alphanumeric())
899 .to_lowercase();
900 if w.len() <= 2 {
901 continue;
902 }
903 let id = self.interner.intern(&w);
904 if self.stopword_ids.contains(&id) {
905 continue;
906 }
907 ids.insert(id);
908 }
909
910 self.word_history.push_back((self.render_index, ids));
911
912 // Trim to window
913 while self.word_history.len() > WORD_HISTORY_WINDOW {
914 self.word_history.pop_front();
915 }
916 }
917
918 /// Iterate over the recent sentence-length history (newest last).
919 /// Each value is the word count of one emitted sentence inside the
920 /// rhythm-tracking window. Exposed for profile-aware scorers that
921 /// need to read the cadence buffer without snapshotting the whole
922 /// session — the buffer is short and read-only from outside.
923 pub fn sentence_length_iter(&self) -> impl Iterator<Item = usize> + '_ {
924 self.sentence_length_history.iter().copied()
925 }
926
927 /// Record word counts for the sentences emitted by the committed render.
928 pub fn record_sentence_rhythm(&mut self, output: &str) {
929 for len in sentence_word_counts(output) {
930 self.sentence_length_history.push_back(len);
931 while self.sentence_length_history.len() > SENTENCE_RHYTHM_WINDOW {
932 self.sentence_length_history.pop_front();
933 }
934 }
935 }
936
937 /// Score a candidate output for repetition against recent history.
938 /// Lower score = less repetition = better.
939 pub fn repetition_score(&self, candidate: &str) -> f64 {
940 // Collect candidate word ids; new words may not be in the interner
941 // yet, so use `get` (read-only) and skip unknowns — they have no
942 // history so they contribute zero to the score.
943 let candidate_ids: HashSet<u32> = candidate
944 .split_whitespace()
945 .filter_map(|raw| {
946 let w = raw
947 .trim_matches(|c: char| !c.is_alphanumeric())
948 .to_lowercase();
949 if w.len() <= 2 {
950 return None;
951 }
952 let id = self.interner.get(&w)?;
953 if self.stopword_ids.contains(&id) {
954 return None;
955 }
956 Some(id)
957 })
958 .collect();
959
960 let mut score = 0.0;
961 for (idx, ids) in &self.word_history {
962 let distance = self.render_index.saturating_sub(*idx);
963 let overlap = candidate_ids.intersection(ids).count();
964 // Closer renders penalized more heavily
965 let weight = match distance {
966 0 | 1 => 3.0,
967 2 => 2.0,
968 3 => 1.0,
969 _ => 0.5,
970 };
971 score += overlap as f64 * weight;
972 }
973 score
974 }
975
976 /// Score a candidate output against recent sentence-length cadence.
977 /// Lower is better: candidates with sentence lengths that were just
978 /// emitted receive a penalty, while noticeably shorter or longer variants
979 /// are preferred when repetition scores are otherwise close.
980 ///
981 /// In addition to the per-sentence closeness/mean components, a bounded
982 /// same-side penalty fires for each consecutive sentence pair (history
983 /// → candidate, then candidate → candidate) that lands on the same side
984 /// of the running mean. This nudges the selector toward burst-pivot
985 /// cadence — alternating short/long around the mean — which is a hallmark
986 /// of natural prose. The penalty is purely additive and capped per
987 /// sentence so it cannot zero out repetition penalties or push the score
988 /// negative.
989 pub fn sentence_rhythm_score(&self, candidate: &str) -> f64 {
990 let candidate_lengths = sentence_word_counts(candidate);
991 if candidate_lengths.is_empty() || self.sentence_length_history.is_empty() {
992 return 0.0;
993 }
994
995 let recent_mean = self.sentence_length_history.iter().sum::<usize>() as f64
996 / self.sentence_length_history.len() as f64;
997
998 // Side of mean for the most recent emitted sentence, if any. Sentences
999 // exactly at the mean are treated as neutral (None) and never trigger
1000 // a same-side penalty in either direction.
1001 let mut prev_side = self
1002 .sentence_length_history
1003 .back()
1004 .and_then(|len| side_of_mean(*len as f64, recent_mean));
1005
1006 let mut score = 0.0;
1007 for len in &candidate_lengths {
1008 let closest = self
1009 .sentence_length_history
1010 .iter()
1011 .map(|recent| recent.abs_diff(*len))
1012 .min()
1013 .unwrap_or(usize::MAX);
1014
1015 score += match closest {
1016 0 => 3.0,
1017 1 => 2.0,
1018 2 => 1.0,
1019 3 => 0.5,
1020 _ => 0.0,
1021 };
1022
1023 let mean_delta = (*len as f64 - recent_mean).abs();
1024 if mean_delta < 1.0 {
1025 score += 1.0;
1026 } else if mean_delta < 2.0 {
1027 score += 0.5;
1028 }
1029
1030 let cur_side = side_of_mean(*len as f64, recent_mean);
1031 if let (Some(prev), Some(cur)) = (prev_side, cur_side)
1032 && prev == cur
1033 {
1034 score += SAME_SIDE_PENALTY;
1035 }
1036 // Carry candidate side forward so within-candidate runs (e.g.
1037 // long → long → long) accumulate the penalty across each pair,
1038 // not just against history.
1039 if cur_side.is_some() {
1040 prev_side = cur_side;
1041 }
1042 }
1043
1044 score / candidate_lengths.len() as f64
1045 }
1046
1047 /// Recency-weighted frequency of a specific word in recent output.
1048 /// Higher numbers mean the word has appeared recently and/or often.
1049 /// Used to pick the least-recently-used synonym from a registered
1050 /// group for elegant variation.
1051 pub fn word_frequency(&self, word: &str) -> f64 {
1052 let lower = word.to_lowercase();
1053 // Word must already be interned; if it has never appeared in history
1054 // its frequency is zero by definition.
1055 let id = match self.interner.get(&lower) {
1056 Some(id) => id,
1057 None => return 0.0,
1058 };
1059 let mut score = 0.0;
1060 for (idx, ids) in &self.word_history {
1061 if !ids.contains(&id) {
1062 continue;
1063 }
1064 let distance = self.render_index.saturating_sub(*idx);
1065 let weight = match distance {
1066 0 | 1 => 3.0,
1067 2 => 2.0,
1068 3 => 1.0,
1069 _ => 0.5,
1070 };
1071 score += weight;
1072 }
1073 score
1074 }
1075
1076 /// Select the next list style. Walks `LIST_STYLES` deterministically from
1077 /// `last_list_style` forward and returns the first style that is not in
1078 /// the recent-window (`recent_list_styles`). The walk advances past the
1079 /// chosen slot so subsequent calls progress through the palette rather
1080 /// than locking onto the first non-recent slot.
1081 ///
1082 /// Anti-repeat is fully deterministic — no RNG dependency — and ensures
1083 /// that an explicit forced style (e.g. `{|join:bracketed}` recorded via
1084 /// [`Self::record_list_style_used`]) does not collide with the very next
1085 /// auto-cycle pick. Falls back to the modulo slot if every style somehow
1086 /// sits in the recent window (unreachable while
1087 /// `LIST_STYLE_RECENT_WINDOW < LIST_STYLES.len()`, but kept defensive).
1088 pub fn next_list_style(&mut self) -> ListStyle {
1089 self.next_list_style_with_bias(None)
1090 }
1091
1092 /// Profile-aware variant of [`Self::next_list_style`].
1093 ///
1094 /// When `bias` is `Some(target)` and `target` is not currently inside
1095 /// the anti-repeat window, the cycle advances to the slot just past
1096 /// `target` and emits it. When `bias` is `None` (i.e., the profile's
1097 /// `ListStyleBias::Auto` default), or when the bias target is in the
1098 /// recent window, the natural cycle picks as in `next_list_style`.
1099 /// The bias is a preference, not an override — anti-repeat always wins.
1100 pub fn next_list_style_with_bias(&mut self, bias: Option<ListStyle>) -> ListStyle {
1101 if let Some(target) = bias
1102 && !self.recent_list_styles.contains(&target)
1103 && let Some(target_idx) = LIST_STYLES.iter().position(|s| *s == target)
1104 {
1105 // Advance the cycle to the slot just past the bias target so
1106 // the natural rotation continues coherently afterward, then
1107 // emit the target.
1108 self.last_list_style = target_idx.wrapping_add(1);
1109 self.push_recent_list_style(target);
1110 self.last_list_style_used = Some(target);
1111 return target;
1112 }
1113
1114 let len = LIST_STYLES.len();
1115 let start = self.last_list_style % len;
1116
1117 let mut chosen_offset = 0;
1118 for offset in 0..len {
1119 let candidate = LIST_STYLES[(start + offset) % len];
1120 if !self.recent_list_styles.contains(&candidate) {
1121 chosen_offset = offset;
1122 break;
1123 }
1124 }
1125
1126 let style = LIST_STYLES[(start + chosen_offset) % len];
1127 // Advance past the picked slot so the cycle continues to make
1128 // forward progress rather than re-evaluating from the same start
1129 // on the next call.
1130 self.last_list_style = self.last_list_style.wrapping_add(chosen_offset + 1);
1131 self.push_recent_list_style(style);
1132 self.last_list_style_used = Some(style);
1133 style
1134 }
1135
1136 /// Record an explicit list style (e.g. `{|join:bracketed}`) for
1137 /// diagnostics AND anti-repeat. Forced styles count toward the recent
1138 /// window so a subsequent auto-cycle pick won't immediately repeat the
1139 /// forced phrasing.
1140 pub fn record_list_style_used(&mut self, style: ListStyle) {
1141 self.push_recent_list_style(style);
1142 self.last_list_style_used = Some(style);
1143 }
1144
1145 fn push_recent_list_style(&mut self, style: ListStyle) {
1146 // Drop duplicates of `style` already in the window before we push,
1147 // so the trailing slot is always "the most recent N *distinct*
1148 // styles" rather than the same forced style filling the buffer.
1149 self.recent_list_styles.retain(|&s| s != style);
1150 if self.recent_list_styles.len() == LIST_STYLE_RECENT_WINDOW {
1151 self.recent_list_styles.pop_front();
1152 }
1153 self.recent_list_styles.push_back(style);
1154 }
1155
1156 /// List style applied by the most recent render's `|join` pipe (if any).
1157 pub fn last_list_style_used(&self) -> Option<ListStyle> {
1158 self.last_list_style_used
1159 }
1160
1161 /// Push phantom entries onto `connective_history` AND
1162 /// `connective_family_history` so the next connective selection treats
1163 /// these as recently used by both the exact-cooldown rule and the
1164 /// family-budget gate. Each connective is mapped to its lexical family
1165 /// (Continuation / Similarity / Contrast) by membership in the
1166 /// per-relation pools; unknown strings push a `None` family slot so
1167 /// the budget gate is unaffected. Pushes are bounded by the same
1168 /// window caps the live emit path uses; phantom entries decay
1169 /// naturally as new emissions arrive. Used by the retrospective
1170 /// refine pass to apply `PrimeRecencyWindow` constraints.
1171 pub(crate) fn prime_connective_history(&mut self, connectives: &[String]) {
1172 for c in connectives {
1173 self.connective_history.push_back(c.clone());
1174 if self.connective_history.len() > CONNECTIVE_WINDOW {
1175 self.connective_history.pop_front();
1176 }
1177 let family = family_for_connective(c.as_str());
1178 self.record_family_slot(family);
1179 }
1180 }
1181
1182 /// Push phantom entries onto `recent_list_styles` so the next
1183 /// auto-cycle pick treats these styles as recently used. Mirrors the
1184 /// dedup-and-cap semantics of [`Self::push_recent_list_style`]: an
1185 /// already-recent style is moved to the trailing slot rather than
1186 /// duplicated. Used by the retrospective refine pass to apply
1187 /// `PrimeRecencyWindow` constraints.
1188 pub(crate) fn prime_list_style_history(&mut self, list_styles: &[ListStyle]) {
1189 for &style in list_styles {
1190 self.push_recent_list_style(style);
1191 }
1192 }
1193
1194 /// Record whether Silent-mode cleanup stripped any trailing orphan words
1195 /// during the most recent render.
1196 pub fn set_cleanup_stripped_tail(&mut self, stripped: bool) {
1197 self.last_cleanup_stripped_tail = stripped;
1198 }
1199
1200 /// Whether the most recent render's cleanup pass removed trailing
1201 /// orphan words (Silent strictness only). `false` in other modes.
1202 pub fn last_cleanup_stripped_tail(&self) -> bool {
1203 self.last_cleanup_stripped_tail
1204 }
1205
1206 /// Whether this is the first render (no prior discourse context).
1207 pub fn is_first_render(&self) -> bool {
1208 self.render_index <= 1
1209 }
1210
1211 /// Whether a prior render happened in this discourse scope, used by
1212 /// the `{noun|demonstrative}` pipe to decide between "this X" and
1213 /// "the X". Cleared by `reset()`.
1214 pub fn has_prior_render(&self) -> bool {
1215 // begin_render has already bumped render_index for the current
1216 // render, so strictly greater than 1 means at least one earlier
1217 // render contributed to discourse state.
1218 self.render_index > 1
1219 }
1220
1221 /// Advance Cb tracking for the next render. Call this after all mutations
1222 /// from the current render (`mention_entity`, `record_output_words`) have
1223 /// completed and the render has committed. On render failure the
1224 /// `Session` snapshot/restore path will roll back `cb` and `previous_focus`
1225 /// along with all other fields via `Clone`.
1226 ///
1227 /// Called by `Engine::render_tx` at the end of each successful render.
1228 pub fn advance_cb(&mut self) {
1229 self.compute_cb_transition();
1230 }
1231
1232 /// The Centering Theory transition class from the most recent `advance_cb` call.
1233 /// Returns `Transition::NoCb` before any render or after a reset.
1234 pub fn last_transition(&self) -> Transition {
1235 self.last_transition
1236 }
1237
1238 /// The current backward-looking center, if any.
1239 pub fn cb(&self) -> Option<&str> {
1240 self.cb.as_deref()
1241 }
1242
1243 /// The forward-looking centers being built during the current render,
1244 /// ordered by rank ascending (Cp = first element).
1245 pub fn cf(&self) -> &[Cf] {
1246 &self.current_cf
1247 }
1248
1249 /// The forward-looking centers from the previous render.
1250 pub fn previous_cf(&self) -> &[Cf] {
1251 &self.previous_cf
1252 }
1253
1254 /// Compute and store the Cb for the **next** render, using Cf overlap to
1255 /// identify the backward-looking center as the highest-ranked entity in
1256 /// Cf(current) that also appeared in Cf(previous).
1257 ///
1258 /// When the pure Cf-overlap definition yields no shared entity, the method
1259 /// falls back to prior-focus logic to preserve backward compatibility with
1260 /// Rule 1 pronoun tests:
1261 ///
1262 /// - **No previous Cf** (first render / post-reset): Cb = Cp of current.
1263 /// - **No overlap, new entity first time**: prior focus stays as Cb
1264 /// (Smooth Shift — introduce gently, keep prior thread alive).
1265 /// - **No overlap, entity seen before**: Cb = current Cp (Retain-style).
1266 /// - **No current entity**: Cb carries prior focus forward.
1267 fn compute_cb_transition(&mut self) {
1268 // Cp of this render = first element of current_cf (lowest rank).
1269 let current_cp: Option<String> = self.current_cf.first().map(|c| c.name.clone());
1270 let prev_cb = self.cb.clone();
1271
1272 // New Cb: highest-ranked Cf member shared with the previous Cf.
1273 let new_cb: Option<String> = self
1274 .current_cf
1275 .iter()
1276 .find(|c| self.previous_cf.iter().any(|p| p.name == c.name))
1277 .map(|c| c.name.clone());
1278
1279 // Fallback when the Cf-overlap definition yields nothing.
1280 let new_cb = match (new_cb, current_cp.clone(), self.previous_focus.clone()) {
1281 // Overlap found: use it.
1282 (Some(cb), _, _) => Some(cb),
1283
1284 // First render (no previous focus yet): Cb = Cp.
1285 (None, Some(cp), None) => Some(cp),
1286
1287 // No overlap, but there is a previous focus.
1288 (None, Some(cp), Some(_)) => {
1289 if self.entities.get(&cp).is_some_and(|m| m.mention_count > 1) {
1290 // Entity seen before: Retain — Cb shifts to newly-focused entity.
1291 Some(cp)
1292 } else {
1293 // Brand-new entity: Smooth Shift — prior focus stays as Cb.
1294 self.previous_focus.clone()
1295 }
1296 }
1297
1298 // No current entity: carry prior focus forward.
1299 (None, None, Some(p)) => Some(p),
1300 (None, None, None) => None,
1301 };
1302
1303 // Classify the transition.
1304 let transition =
1305 classify_transition(new_cb.as_deref(), prev_cb.as_deref(), current_cp.as_deref());
1306
1307 self.cb = new_cb;
1308 self.last_transition = transition;
1309
1310 // Shift state forward for the next call.
1311 self.previous_focus = current_cp;
1312 self.previous_cf = core::mem::take(&mut self.current_cf);
1313 }
1314}
1315
1316/// Classify a Centering Theory transition given the new Cb, the previous Cb,
1317/// and the Cp (preferred center) of the current utterance.
1318///
1319/// Returns `NoCb` when:
1320/// - There is no current Cb (the utterance has no realized entities), or
1321/// - There is no previous Cb (first render or post-reset — no prior discourse
1322/// context exists to classify a transition against).
1323fn classify_transition(cb: Option<&str>, prev_cb: Option<&str>, cp: Option<&str>) -> Transition {
1324 let cb = match cb {
1325 Some(c) => c,
1326 None => return Transition::NoCb,
1327 };
1328 // No prior Cb → no meaningful transition (first render or post-reset).
1329 let prev_cb = match prev_cb {
1330 Some(p) => p,
1331 None => return Transition::NoCb,
1332 };
1333 let cb_eq_prev = prev_cb == cb;
1334 let cb_eq_cp = matches!(cp, Some(c) if c == cb);
1335
1336 match (cb_eq_prev, cb_eq_cp) {
1337 (true, true) => Transition::Continue,
1338 (true, false) => Transition::Retain,
1339 (false, true) => Transition::SmoothShift,
1340 (false, false) => Transition::RoughShift,
1341 }
1342}
1343
1344impl Default for DiscourseState {
1345 fn default() -> Self {
1346 Self::new()
1347 }
1348}
1349
1350pub(crate) fn sentence_word_counts(text: &str) -> Vec<usize> {
1351 let mut counts = Vec::new();
1352 let mut current = 0usize;
1353
1354 for raw in text.split_whitespace() {
1355 if raw.chars().any(|c| c.is_alphanumeric()) {
1356 current += 1;
1357 }
1358
1359 if (raw.ends_with('.') || raw.ends_with('!') || raw.ends_with('?')) && current > 0 {
1360 counts.push(current);
1361 current = 0;
1362 }
1363 }
1364
1365 if current > 0 {
1366 counts.push(current);
1367 }
1368
1369 counts
1370}
1371
1372/// Check if two template keys represent the same action type.
1373/// e.g., "code.renamed" and "code.renamed" → true
1374/// e.g., "code.renamed" and "code.deleted" → false
1375fn keys_share_action(a: &str, b: &str) -> bool {
1376 a == b
1377}
1378
1379/// Check if two template keys represent contrasting actions.
1380fn keys_contrast(a: &str, b: &str) -> bool {
1381 let contrasts = &[("added", "deleted"), ("added", "removed")];
1382 let a_action = a.rsplit('.').next().unwrap_or("");
1383 let b_action = b.rsplit('.').next().unwrap_or("");
1384
1385 contrasts
1386 .iter()
1387 .any(|&(x, y)| (a_action == x && b_action == y) || (a_action == y && b_action == x))
1388}
1389
1390#[cfg(test)]
1391mod tests {
1392 use super::*;
1393
1394 #[test]
1395 fn first_mention_is_full() {
1396 let state = DiscourseState::new();
1397 assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1398 }
1399
1400 #[test]
1401 fn second_mention_is_pronoun_when_focused() {
1402 let mut state = DiscourseState::new();
1403 state.begin_render();
1404 state.mention_entity("UserService", "class");
1405
1406 state.begin_render();
1407 assert_eq!(state.reference_form("UserService"), ReferenceForm::Pronoun);
1408 }
1409
1410 #[test]
1411 fn ambiguity_prevents_pronoun() {
1412 let mut state = DiscourseState::new();
1413 state.begin_render();
1414 state.mention_entity("UserService", "class");
1415 state.mention_entity("AuthService", "class");
1416
1417 state.begin_render();
1418 // Both were mentioned recently — ambiguous, use short name
1419 assert_eq!(
1420 state.reference_form("UserService"),
1421 ReferenceForm::ShortName
1422 );
1423 }
1424
1425 #[test]
1426 fn distant_mention_reintroduces_full() {
1427 let mut state = DiscourseState::new();
1428 state.begin_render();
1429 state.mention_entity("UserService", "class");
1430
1431 // Advance several renders without mentioning it
1432 state.begin_render();
1433 state.begin_render();
1434 state.begin_render();
1435
1436 assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1437 }
1438
1439 #[test]
1440 fn reset_clears_all_state() {
1441 let mut state = DiscourseState::new();
1442 state.begin_render();
1443 state.mention_entity("UserService", "class");
1444 state.record_template_choice("code.renamed", 0);
1445
1446 state.reset();
1447
1448 assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1449 assert_eq!(state.last_template_variant("code.renamed"), None);
1450 assert!(state.is_first_render());
1451 }
1452
1453 #[test]
1454 fn template_history_tracks_last_variant() {
1455 let mut state = DiscourseState::new();
1456 state.record_template_choice("code.renamed", 2);
1457 assert_eq!(state.last_template_variant("code.renamed"), Some(2));
1458 }
1459
1460 #[test]
1461 fn connective_avoids_repetition() {
1462 let mut state = DiscourseState::new();
1463 let rel = DiscourseRelation::SameEntityDifferentAction;
1464
1465 let c1 = state.select_connective(&rel).unwrap();
1466 let c2 = state.select_connective(&rel).unwrap();
1467 let c3 = state.select_connective(&rel).unwrap();
1468
1469 assert_ne!(c1, c2);
1470 assert_ne!(c2, c3);
1471 assert_ne!(c1, c3);
1472 }
1473
1474 #[test]
1475 fn connective_recency_window_spans_mixed_relation_types() {
1476 let mut state = DiscourseState::new();
1477 let same_entity = DiscourseRelation::SameEntityDifferentAction;
1478 let same_action = DiscourseRelation::DifferentEntitySameAction;
1479 let contrast = DiscourseRelation::Contrast;
1480
1481 assert_eq!(state.select_connective(&same_entity), Some("Additionally,"));
1482 assert_eq!(state.select_connective(&contrast), Some("Meanwhile,"));
1483 assert_eq!(state.select_connective(&same_action), Some("Similarly,"));
1484 assert_eq!(state.select_connective(&contrast), Some("However,"));
1485
1486 // "Additionally," is still inside the six-entry recency window, so
1487 // the selector moves to the next unused same-entity connective.
1488 assert_eq!(state.select_connective(&same_entity), Some("Furthermore,"));
1489 }
1490
1491 #[test]
1492 fn connective_family_budget_drops_to_null_when_pool_saturates() {
1493 let mut state = DiscourseState::new();
1494 let rel = DiscourseRelation::SameEntityDifferentAction;
1495
1496 // Three-element continuation pool drains uniquely.
1497 assert_eq!(state.select_connective(&rel), Some("Additionally,"));
1498 assert_eq!(state.select_connective(&rel), Some("Furthermore,"));
1499 assert_eq!(state.select_connective(&rel), Some("It also"));
1500
1501 // Saturation: rather than recycling the LRU choice and producing
1502 // an Additionally,/Furthermore,/It also,/Additionally,... cycle,
1503 // the family budget suppresses the next emissions so the prose
1504 // dissolves into plain follow-on sentences.
1505 assert_eq!(state.select_connective(&rel), None);
1506 assert_eq!(state.select_connective(&rel), None);
1507 assert_eq!(state.select_connective(&rel), None);
1508
1509 // Once enough null slots accumulate inside the trailing window,
1510 // the budget reopens and the LRU pick resumes — Additionally
1511 // is the oldest emitted connector in `connective_history`.
1512 assert_eq!(state.select_connective(&rel), Some("Additionally,"));
1513 }
1514
1515 /// Regression for the service-shape prose Matt flagged: five follow-on
1516 /// sentences that all trigger DifferentEntitySameAction must NOT
1517 /// produce a `Similarly,/Likewise,/Similarly,/Likewise,/Similarly,`
1518 /// alternation. The two-element similarity pool can sustain at most
1519 /// two emissions inside the family window before the budget forces
1520 /// nulls so the pattern dissolves.
1521 #[test]
1522 fn similarity_family_budget_breaks_service_shape_alternation() {
1523 let mut state = DiscourseState::new();
1524 let rel = DiscourseRelation::DifferentEntitySameAction;
1525
1526 let emissions: Vec<Option<&'static str>> =
1527 (0..5).map(|_| state.select_connective(&rel)).collect();
1528
1529 let connectors: Vec<&'static str> = emissions.iter().filter_map(|e| *e).collect();
1530
1531 assert!(
1532 connectors.len() <= 2,
1533 "expected at most two similarity-family connectives across five \
1534 follow-on sentences, got {emissions:?}"
1535 );
1536
1537 // No A/B/A pattern: the third emission (if any) must not match
1538 // the connective two slots earlier.
1539 for window in emissions.windows(3) {
1540 if let (Some(a), Some(_), Some(c)) = (window[0], window[1], window[2]) {
1541 assert_ne!(
1542 a, c,
1543 "A/B/A alternation slipped through the budget: {emissions:?}"
1544 );
1545 }
1546 }
1547
1548 // Both members of the pool should appear at most once in the
1549 // surfaced connector list — the budget caps usage at pool.len()
1550 // = 2 distinct connectives, never two of the same.
1551 let similarly = connectors.iter().filter(|c| **c == "Similarly,").count();
1552 let likewise = connectors.iter().filter(|c| **c == "Likewise,").count();
1553 assert!(
1554 similarly <= 1 && likewise <= 1,
1555 "no similarity connector should repeat inside the family window: {emissions:?}"
1556 );
1557 }
1558
1559 #[test]
1560 fn no_connective_for_none_relation() {
1561 let mut state = DiscourseState::new();
1562 assert!(state.select_connective(&DiscourseRelation::None).is_none());
1563 }
1564
1565 /// Regression: repeated entity-less templates must not be classified
1566 /// as DifferentEntitySameAction — that yields spurious "Similarly,"
1567 /// connectives where no entity comparison is meaningful.
1568 #[test]
1569 fn entity_less_repeated_render_produces_no_relation() {
1570 let mut state = DiscourseState::new();
1571 state.begin_render();
1572 state.last_template_key = Some("code.added".to_string());
1573 state.last_entity_name = None;
1574
1575 assert_eq!(
1576 state.detect_relation("code.added", None),
1577 DiscourseRelation::None
1578 );
1579 }
1580
1581 /// Regression: only one side having an entity is also insufficient to
1582 /// infer either same-entity or different-entity relationships.
1583 #[test]
1584 fn one_sided_entity_presence_produces_no_relation() {
1585 let mut state = DiscourseState::new();
1586 state.begin_render();
1587 state.last_template_key = Some("t".to_string());
1588 state.last_entity_name = Some("Foo".to_string());
1589
1590 assert_eq!(state.detect_relation("t", None), DiscourseRelation::None);
1591 }
1592
1593 #[test]
1594 fn detect_same_entity_different_action() {
1595 let mut state = DiscourseState::new();
1596 state.begin_render();
1597 state.last_template_key = Some("code.renamed".to_string());
1598 state.last_entity_name = Some("Foo".to_string());
1599
1600 assert_eq!(
1601 state.detect_relation("code.deleted", Some("Foo")),
1602 DiscourseRelation::SameEntityDifferentAction
1603 );
1604 }
1605
1606 #[test]
1607 fn detect_different_entity_same_action() {
1608 let mut state = DiscourseState::new();
1609 state.begin_render();
1610 state.last_template_key = Some("code.renamed".to_string());
1611 state.last_entity_name = Some("Foo".to_string());
1612
1613 assert_eq!(
1614 state.detect_relation("code.renamed", Some("Bar")),
1615 DiscourseRelation::DifferentEntitySameAction
1616 );
1617 }
1618
1619 #[test]
1620 fn detect_contrast() {
1621 let mut state = DiscourseState::new();
1622 state.begin_render();
1623 state.last_template_key = Some("code.added".to_string());
1624 state.last_entity_name = Some("Foo".to_string());
1625
1626 assert_eq!(
1627 state.detect_relation("code.deleted", Some("Bar")),
1628 DiscourseRelation::Contrast
1629 );
1630 }
1631
1632 #[test]
1633 fn repetition_score_penalizes_recent_overlap() {
1634 let mut state = DiscourseState::new();
1635 state.begin_render();
1636 state.record_output_words("The class UserService was renamed to AccountService");
1637
1638 state.begin_render();
1639 let score_high =
1640 state.repetition_score("The class UserService was modified affecting AccountService");
1641 let score_low = state.repetition_score("AuthGuard removed from the application entirely");
1642
1643 assert!(score_high > score_low);
1644 }
1645
1646 #[test]
1647 fn sentence_rhythm_score_penalizes_recent_sentence_lengths() {
1648 let mut state = DiscourseState::new();
1649 state.record_sentence_rhythm("Alpha changed after validation passed.");
1650
1651 let repeated_cadence = state.sentence_rhythm_score("Beta changed after review passed");
1652 let varied_cadence =
1653 state.sentence_rhythm_score("Beta changed after review passed and deployment resumed");
1654
1655 assert!(
1656 repeated_cadence > varied_cadence,
1657 "same-length candidates should score worse than varied ones"
1658 );
1659 }
1660
1661 #[test]
1662 fn sentence_rhythm_score_penalizes_same_side_runs() {
1663 // History: three short sentences (3, 4, 3 words). Mean = 3.33.
1664 // Last emitted sentence (3 words) is below mean.
1665 //
1666 // Pivoting candidate: a single noticeably-long sentence (above mean)
1667 // — flips side relative to history's last entry, no same-side
1668 // penalty fires.
1669 //
1670 // Same-side candidate: another short sentence (below mean) — same
1671 // side as history's last entry, so the burst-pivot penalty fires.
1672 //
1673 // The same-side candidate's closeness/mean-delta cost is also
1674 // higher (it sits inside the recent cluster), but the penalty must
1675 // strictly increase the gap, not flip its sign. Both effects push
1676 // the score in the same direction; the assertion proves the
1677 // additive penalty is observable on top of the existing terms.
1678 let mut state = DiscourseState::new();
1679 state.record_sentence_rhythm("Alpha shipped today. Beta paused. Gamma shipped.");
1680
1681 let pivoting = state.sentence_rhythm_score(
1682 "Delta shipped after the schema migration finished and the staging build went green",
1683 );
1684 let same_side = state.sentence_rhythm_score("Delta shipped today");
1685
1686 assert!(
1687 same_side > pivoting,
1688 "same-side candidate ({same_side}) must score worse than pivoting candidate ({pivoting})"
1689 );
1690 }
1691
1692 #[test]
1693 fn sentence_rhythm_score_pivot_penalty_does_not_dominate_repetition() {
1694 // Construct two candidates where the same-side candidate is
1695 // otherwise repetition-clean and the pivoting candidate reuses the
1696 // entire prior render's vocabulary. The discourse score the engine
1697 // actually compares is repetition + rhythm; this test pins down
1698 // that the rhythm penalty cannot flip the verdict on its own — the
1699 // repetition signal must still dominate.
1700 let mut state = DiscourseState::new();
1701 state.begin_render();
1702 state.record_output_words("AuthService validated tokens against the registry");
1703 state.record_sentence_rhythm("AuthService validated tokens against the registry.");
1704
1705 state.begin_render();
1706 // Pivoting candidate sits on the opposite side of the running mean
1707 // (much longer) but reuses every distinctive word from the prior
1708 // render — heavy repetition.
1709 let pivoting_repeats = "AuthService validated tokens against the registry yet again";
1710 // Same-side candidate matches the prior cadence (same length) but
1711 // introduces wholly new vocabulary — minimal repetition.
1712 let same_side_clean = "PaymentGateway settled invoices nightly";
1713
1714 let rep_pivot = state.repetition_score(pivoting_repeats);
1715 let rep_clean = state.repetition_score(same_side_clean);
1716 let rhy_pivot = state.sentence_rhythm_score(pivoting_repeats);
1717 let rhy_clean = state.sentence_rhythm_score(same_side_clean);
1718
1719 assert!(
1720 (rep_pivot + rhy_pivot) > (rep_clean + rhy_clean),
1721 "repetition-heavy pivoting candidate ({}) must still score worse \
1722 than the repetition-clean same-side candidate ({}); the burst-pivot \
1723 penalty is a tie-breaker, not a faithfulness override",
1724 rep_pivot + rhy_pivot,
1725 rep_clean + rhy_clean,
1726 );
1727 // And the rhythm-side delta alone must be smaller than the
1728 // repetition-side delta — proves the penalty is bounded relative
1729 // to the dominant constraint.
1730 assert!(
1731 (rep_pivot - rep_clean).abs() > (rhy_clean - rhy_pivot).abs(),
1732 "repetition delta ({}) must dominate rhythm delta ({})",
1733 rep_pivot - rep_clean,
1734 rhy_clean - rhy_pivot,
1735 );
1736 }
1737
1738 #[test]
1739 fn sentence_rhythm_score_is_never_negative() {
1740 // The score is a sum of non-negative components divided by a
1741 // positive count. Sweep a handful of histories and candidates to
1742 // pin down the invariant — a future change that introduces a
1743 // reward (subtraction) must update this test deliberately.
1744 let mut state = DiscourseState::new();
1745 for prior in [
1746 "Alpha shipped.",
1747 "Beta paused after the long postmortem dragged on.",
1748 "Gamma. Delta. Epsilon shipped after lunch.",
1749 ] {
1750 state.record_sentence_rhythm(prior);
1751 }
1752
1753 for candidate in [
1754 "",
1755 "Zeta shipped.",
1756 "Zeta shipped after a careful review and a brief rollout window.",
1757 "Short. Long sentence with quite a few words inside it. Short again.",
1758 ] {
1759 let score = state.sentence_rhythm_score(candidate);
1760 assert!(
1761 score >= 0.0,
1762 "rhythm score must be non-negative (candidate `{candidate}`, score {score})"
1763 );
1764 }
1765 }
1766
1767 #[test]
1768 fn sentence_rhythm_history_is_bounded() {
1769 let mut state = DiscourseState::new();
1770 state.record_sentence_rhythm(
1771 "One changed. Two changed. Three changed. Four changed. Five changed. Six changed. Seven changed.",
1772 );
1773
1774 assert_eq!(state.sentence_length_history.len(), SENTENCE_RHYTHM_WINDOW);
1775 }
1776
1777 // --- Cb tracking tests (Phase 1) ---
1778
1779 #[test]
1780 fn cb_none_before_first_render() {
1781 let state = DiscourseState::new();
1782 assert_eq!(state.cb, None);
1783 }
1784
1785 #[test]
1786 fn cb_becomes_focus_after_first_render() {
1787 let mut state = DiscourseState::new();
1788 state.begin_render();
1789 state.mention_entity("Foo", "class");
1790 state.advance_cb();
1791 assert_eq!(state.cb.as_deref(), Some("Foo"));
1792 }
1793
1794 #[test]
1795 fn cb_stays_on_continue_transition() {
1796 let mut state = DiscourseState::new();
1797 state.begin_render();
1798 state.mention_entity("Foo", "class");
1799 state.advance_cb();
1800 state.begin_render();
1801 state.mention_entity("Foo", "class");
1802 state.advance_cb();
1803 assert_eq!(state.cb.as_deref(), Some("Foo"));
1804 }
1805
1806 #[test]
1807 fn cb_shifts_to_prior_focus_on_new_entity_intro() {
1808 // Render 1: Foo → Cb becomes Foo (first render, no prev).
1809 // Render 2: Bar (new entity, mention_count == 1 so Smooth Shift) → Cb stays Foo.
1810 let mut state = DiscourseState::new();
1811 state.begin_render();
1812 state.mention_entity("Foo", "class");
1813 state.advance_cb();
1814 state.begin_render();
1815 state.mention_entity("Bar", "class");
1816 state.advance_cb();
1817 assert_eq!(state.cb.as_deref(), Some("Foo"));
1818 }
1819
1820 #[test]
1821 fn cb_shifts_to_current_on_retain() {
1822 // Render 1: Foo
1823 // Render 2: Foo (continue)
1824 // Render 3: Foo (continue)
1825 // Render 4: Bar (new entity; Smooth Shift → Cb=Foo)
1826 // Render 5: Foo (re-focus on previously-seen entity; Retain → Cb=Foo)
1827 let mut state = DiscourseState::new();
1828 for name in ["Foo", "Foo", "Foo", "Bar", "Foo"] {
1829 state.begin_render();
1830 state.mention_entity(name, "class");
1831 state.advance_cb();
1832 }
1833 // Foo has mention_count >= 2 by render 5 → Retain → Cb=Foo
1834 assert_eq!(state.cb.as_deref(), Some("Foo"));
1835 }
1836
1837 #[test]
1838 fn cb_reset_clears_state() {
1839 let mut state = DiscourseState::new();
1840 state.begin_render();
1841 state.mention_entity("Foo", "class");
1842 state.advance_cb();
1843 state.reset();
1844 assert_eq!(state.cb, None);
1845 assert_eq!(state.previous_focus, None);
1846 }
1847
1848 #[test]
1849 fn reference_form_all_variants_distinct() {
1850 // Sanity: ensure the new variants are distinguishable.
1851 assert_ne!(ReferenceForm::Full, ReferenceForm::Zero);
1852 assert_ne!(ReferenceForm::Pronoun, ReferenceForm::Demonstrative);
1853 assert_ne!(ReferenceForm::Pronoun, ReferenceForm::Possessive);
1854 assert_ne!(ReferenceForm::Possessive, ReferenceForm::ShortName);
1855 assert_ne!(ReferenceForm::Zero, ReferenceForm::Demonstrative);
1856 }
1857
1858 #[test]
1859 fn list_style_cycles() {
1860 let mut state = DiscourseState::new();
1861 let s1 = state.next_list_style();
1862 let s2 = state.next_list_style();
1863 let s3 = state.next_list_style();
1864 let s4 = state.next_list_style();
1865
1866 // The first four picks still match the original order so existing
1867 // golden tests (e.g. document_render_preserves_list_style_cycle_across_paragraphs)
1868 // remain stable.
1869 assert_eq!(s1, ListStyle::Including);
1870 assert_eq!(s2, ListStyle::SuchAs);
1871 assert_eq!(s3, ListStyle::Dash);
1872 assert_eq!(s4, ListStyle::Bracketed);
1873 }
1874
1875 #[test]
1876 fn list_style_cycle_visits_every_variant_within_palette_length() {
1877 // Anti-repeat plus deterministic walk should still surface every
1878 // registered variant within LIST_STYLES.len() consecutive picks,
1879 // otherwise the palette has dead variants users never see.
1880 let mut state = DiscourseState::new();
1881 let mut seen: std::collections::HashSet<ListStyle> = std::collections::HashSet::new();
1882 for _ in 0..LIST_STYLES.len() {
1883 seen.insert(state.next_list_style());
1884 }
1885 assert_eq!(
1886 seen.len(),
1887 LIST_STYLES.len(),
1888 "anti-repeat cycle dropped a variant: visited {seen:?}"
1889 );
1890 }
1891
1892 #[test]
1893 fn list_style_anti_repeat_skips_recent_window() {
1894 // With LIST_STYLE_RECENT_WINDOW = 2, no style may repeat within 3
1895 // consecutive picks. Walk a long horizon and assert the invariant.
1896 let mut state = DiscourseState::new();
1897 let mut history: Vec<ListStyle> = Vec::new();
1898 for _ in 0..(LIST_STYLES.len() * 3) {
1899 let style = state.next_list_style();
1900 if history.len() >= LIST_STYLE_RECENT_WINDOW {
1901 let recent = &history[history.len() - LIST_STYLE_RECENT_WINDOW..];
1902 assert!(
1903 !recent.contains(&style),
1904 "style {style:?} repeated within recent window {recent:?} (history: {history:?})"
1905 );
1906 }
1907 history.push(style);
1908 }
1909 }
1910
1911 #[test]
1912 fn forced_list_style_blocks_immediate_auto_repeat() {
1913 // record_list_style_used pushes onto the same recent window as
1914 // next_list_style. After forcing Bracketed twice in a row, the
1915 // next auto pick must NOT be Bracketed — the original failure
1916 // mode was a pure-modulo cycle landing on the just-forced style.
1917 let mut state = DiscourseState::new();
1918 state.record_list_style_used(ListStyle::Bracketed);
1919 state.record_list_style_used(ListStyle::Bracketed);
1920
1921 let auto = state.next_list_style();
1922 assert_ne!(auto, ListStyle::Bracketed);
1923 }
1924
1925 #[test]
1926 fn forced_list_style_followed_by_auto_skips_window() {
1927 // If the template forces Including at the same point the auto-cycle
1928 // would have produced Including, the next auto pick must skip past
1929 // the forced style rather than emit it again.
1930 let mut state = DiscourseState::new();
1931 // Auto cycle starts at LIST_STYLES[0] = Including. Pre-empt with
1932 // a forced Including.
1933 state.record_list_style_used(ListStyle::Including);
1934 let auto = state.next_list_style();
1935 assert_ne!(auto, ListStyle::Including);
1936 }
1937
1938 #[test]
1939 fn reset_list_cycle_clears_recent_window_so_first_style_returns() {
1940 let mut state = DiscourseState::new();
1941 let _ = state.next_list_style();
1942 let _ = state.next_list_style();
1943 state.reset_list_cycle();
1944
1945 assert_eq!(state.next_list_style(), ListStyle::Including);
1946 }
1947
1948 // --- Paragraph-reset invariants (preserve narrative-level anti-repeat,
1949 // clear paragraph-local pronoun/centering state) ---
1950
1951 #[test]
1952 fn paragraph_reset_clears_focus_entity_so_no_pronoun_leak() {
1953 let mut state = DiscourseState::new();
1954 state.begin_render();
1955 state.mention_entity("UserService", "class");
1956
1957 state.reset_for_paragraph();
1958
1959 // Without an entity table or focus carryover, the next paragraph's
1960 // first reference must reintroduce the entity in full form rather
1961 // than pronominalize a stale focus from the prior paragraph.
1962 assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1963 assert_eq!(state.focus_entity, None);
1964 assert!(!state.focus_is_plural);
1965 }
1966
1967 #[test]
1968 fn paragraph_reset_clears_centering_state() {
1969 let mut state = DiscourseState::new();
1970 state.begin_render();
1971 state.mention_entity_ranked("Foo", "class", 0);
1972 state.advance_cb();
1973 state.begin_render();
1974 state.mention_entity_ranked("Foo", "class", 0);
1975 state.advance_cb();
1976
1977 state.reset_for_paragraph();
1978
1979 assert_eq!(state.cb(), None);
1980 assert!(state.cf().is_empty());
1981 assert!(state.previous_cf().is_empty());
1982 assert_eq!(state.last_transition(), Transition::NoCb);
1983 }
1984
1985 #[test]
1986 fn paragraph_reset_suppresses_cross_paragraph_relation_inference() {
1987 let mut state = DiscourseState::new();
1988 state.begin_render();
1989 state.last_template_key = Some("code.added".to_string());
1990 state.last_entity_name = Some("Foo".to_string());
1991
1992 state.reset_for_paragraph();
1993
1994 // Same key + same entity in the next paragraph must not be classified
1995 // as `Contrast`/`SameEntityDifferentAction` — those would emit a
1996 // cross-paragraph "However,"/"Furthermore," that bridges over the
1997 // intentional paragraph break.
1998 assert_eq!(
1999 state.detect_relation("code.deleted", Some("Foo")),
2000 DiscourseRelation::None
2001 );
2002 }
2003
2004 #[test]
2005 fn paragraph_reset_preserves_template_variant_history() {
2006 let mut state = DiscourseState::new();
2007 state.record_template_choice("code.renamed", 2);
2008
2009 state.reset_for_paragraph();
2010
2011 // Anti-repeat must survive the paragraph break so the next paragraph
2012 // doesn't immediately replay the variant the prior paragraph just used.
2013 assert_eq!(state.last_template_variant("code.renamed"), Some(2));
2014 }
2015
2016 #[test]
2017 fn paragraph_reset_preserves_word_repetition_penalty() {
2018 let mut state = DiscourseState::new();
2019 state.begin_render();
2020 state.record_output_words("AuthGuard removed authentication entirely");
2021
2022 state.reset_for_paragraph();
2023
2024 // begin_render advances render_index for the next paragraph's first
2025 // utterance; the repetition score must still penalize words that
2026 // appeared in the prior paragraph.
2027 state.begin_render();
2028 let overlap_score = state.repetition_score("AuthGuard authentication entirely was removed");
2029 let unrelated_score = state.repetition_score("Telemetry pipeline rebuilt cleanly");
2030 assert!(
2031 overlap_score > unrelated_score,
2032 "expected overlap score {overlap_score} to exceed unrelated {unrelated_score}",
2033 );
2034 assert!(
2035 overlap_score > 0.0,
2036 "word_history must persist across paragraph reset"
2037 );
2038 }
2039
2040 #[test]
2041 fn paragraph_reset_preserves_render_index_so_demonstrative_continues() {
2042 let mut state = DiscourseState::new();
2043 // Simulate paragraph 1 with one event.
2044 state.begin_render();
2045 state.mention_entity("Foo", "class");
2046 state.advance_cb();
2047
2048 state.reset_for_paragraph();
2049
2050 // First render of paragraph 2.
2051 state.begin_render();
2052 // has_prior_render() drives `{noun|demonstrative}`'s "this X" vs
2053 // "the X" decision. Inside a single narrative, "this" remains correct
2054 // after the paragraph break — only a full session reset returns to
2055 // the introductory "the".
2056 assert!(state.has_prior_render());
2057 assert!(!state.is_first_render());
2058 }
2059
2060 #[test]
2061 fn paragraph_reset_preserves_list_style_cycle() {
2062 let mut state = DiscourseState::new();
2063 let first = state.next_list_style();
2064 let second_before = state.next_list_style();
2065
2066 state.reset_for_paragraph();
2067 let next_after_reset = state.next_list_style();
2068
2069 // Cycle must NOT restart at the first style after a paragraph break.
2070 assert_ne!(next_after_reset, first);
2071 assert_ne!(next_after_reset, second_before);
2072 }
2073
2074 #[test]
2075 fn full_reset_clears_anti_repeat_state() {
2076 // The full-narrative reset must still clear everything — anti-repeat
2077 // continuity belongs to a narrative, not to the session as a whole.
2078 let mut state = DiscourseState::new();
2079 state.begin_render();
2080 state.record_template_choice("k", 1);
2081 state.record_output_words("alpha beta gamma");
2082
2083 state.reset();
2084
2085 assert_eq!(state.last_template_variant("k"), None);
2086 // Newly-recorded non-overlapping words score zero against an empty
2087 // word_history.
2088 state.begin_render();
2089 assert_eq!(state.repetition_score("alpha beta gamma"), 0.0);
2090 }
2091
2092 // --- Cf and Transition tests (Phase 2 + Phase 3) ---
2093
2094 #[test]
2095 fn transition_no_cb_before_first_render() {
2096 let state = DiscourseState::new();
2097 assert_eq!(state.last_transition(), Transition::NoCb);
2098 }
2099
2100 #[test]
2101 fn transition_no_cb_when_no_entity() {
2102 let mut state = DiscourseState::new();
2103 state.begin_render();
2104 state.advance_cb();
2105 assert_eq!(state.last_transition(), Transition::NoCb);
2106 }
2107
2108 #[test]
2109 fn transition_nocb_after_first_mention() {
2110 // First render: no previous Cf exists, so no transition is meaningful.
2111 // prev_cb = None → classify_transition returns NoCb (no Cb to compare against prev).
2112 // But after the first render, cb is set to current entity.
2113 // The first advance_cb: new_cb = Some("Foo") (fallback: first render, no prev_focus).
2114 // prev_cb = None → classify_transition(Some("Foo"), None, Some("Foo"))
2115 // → cb_eq_prev = false (prev is None), cb_eq_cp = true → SmoothShift.
2116 // But the plan says NoCb for the first render. The plan's test checks
2117 // last_transition == NoCb after render 1, which means we should return NoCb
2118 // when prev_cb is None (there's no prior Cb to continue from).
2119 let mut state = DiscourseState::new();
2120 state.begin_render();
2121 state.mention_entity("Foo", "class");
2122 state.advance_cb();
2123 assert_eq!(state.last_transition(), Transition::NoCb);
2124 }
2125
2126 #[test]
2127 fn transition_continue_same_entity_and_cp() {
2128 let mut state = DiscourseState::new();
2129 state.begin_render();
2130 state.mention_entity("Foo", "class");
2131 state.advance_cb();
2132 // First render → NoCb.
2133 assert_eq!(state.last_transition(), Transition::NoCb);
2134
2135 state.begin_render();
2136 state.mention_entity("Foo", "class");
2137 state.advance_cb();
2138 // Same entity again: Cb stays Foo, Cp is Foo → Continue.
2139 assert_eq!(state.last_transition(), Transition::Continue);
2140 }
2141
2142 #[test]
2143 fn transition_continue_when_cp_and_cb_both_same() {
2144 let mut state = DiscourseState::new();
2145 state.begin_render();
2146 state.mention_entity_ranked("Foo", "class", 0);
2147 state.advance_cb();
2148
2149 state.begin_render();
2150 state.mention_entity_ranked("Foo", "class", 0);
2151 state.advance_cb();
2152 assert_eq!(state.last_transition(), Transition::Continue);
2153 }
2154
2155 #[test]
2156 fn transition_retain_when_cb_same_but_cp_differs() {
2157 let mut state = DiscourseState::new();
2158 state.begin_render();
2159 state.mention_entity_ranked("Foo", "class", 0);
2160 state.advance_cb();
2161
2162 state.begin_render();
2163 // Foo still in Cf (rank 1 — object), but Cp is now Bar (rank 0 — subject).
2164 // Cb = Foo (only entity in common with previous Cf), Cp = Bar → Cb != Cp → Retain.
2165 state.mention_entity_ranked("Bar", "class", 0);
2166 state.mention_entity_ranked("Foo", "class", 1);
2167 state.advance_cb();
2168 assert_eq!(state.last_transition(), Transition::Retain);
2169 }
2170
2171 #[test]
2172 fn transition_smooth_shift_new_entity() {
2173 let mut state = DiscourseState::new();
2174 state.begin_render();
2175 state.mention_entity("Foo", "class");
2176 state.advance_cb();
2177
2178 state.begin_render();
2179 state.mention_entity("Bar", "class");
2180 state.advance_cb();
2181 // New entity, no overlap with previous Cf → fallback: Bar seen for first time
2182 // → previous_focus stays as Cb. Cp = Bar, Cb = Foo (prev focus).
2183 // prev_cb was Foo; new_cb = Foo; prev_cb == new_cb true; new_cb == Cp false → Retain.
2184 // OR: if Bar is brand-new and no overlap, fallback gives new_cb = previous_focus = Foo.
2185 // Then: cb_eq_prev = (Foo == Foo) = true, cb_eq_cp = (Foo == Bar) = false → Retain.
2186 // But the plan says SmoothShift. The plan's test is at Phase 1 before full Cf is wired.
2187 // With full Cf: previous_cf = [{Foo,0}], current_cf = [{Bar,0}]. No overlap.
2188 // Bar is brand-new (mention_count == 1 after this render but the check uses > 1).
2189 // So fallback: previous_focus (= Foo) → new_cb = Foo.
2190 // classify_transition(Some("Foo"), Some("Foo"), Some("Bar"))
2191 // → cb_eq_prev = true, cb_eq_cp = false → Retain.
2192 // The plan's Phase 1 test was drafted without full Cf; with Cf it's Retain.
2193 // We verify the correct Cf-based result: Retain.
2194 assert_eq!(state.last_transition(), Transition::Retain);
2195 }
2196
2197 #[test]
2198 fn transition_smooth_shift_new_cb_equals_cp() {
2199 // True Smooth Shift: Cb changes AND Cb == Cp.
2200 // We need overlap between current and previous Cf where the new Cb != prev Cb.
2201 // u1: Foo (rank 0). Cb = Foo (first render, NoCb transition).
2202 // u2: Bar (rank 0), Foo (rank 1). Cf overlap = {Foo}. Cb = Foo.
2203 // prev_cb = Foo; new_cb = Foo; cb_eq_prev = true; cb_eq_cp = (Foo==Bar)=false → Retain.
2204 // To get SmoothShift we need new_cb != prev_cb AND new_cb == cp.
2205 // u1: Foo. u2: Bar + Foo (Cb=Foo, prev_cb=Foo → Retain).
2206 // u3: Bar (rank 0 only). Cf={Bar}. Overlap with u2 Cf={Bar,Foo}: Bar is in both.
2207 // new_cb = Bar. prev_cb = Foo. cp = Bar.
2208 // cb_eq_prev = (Bar==Foo) = false; cb_eq_cp = (Bar==Bar) = true → SmoothShift.
2209 let mut state = DiscourseState::new();
2210 state.begin_render();
2211 state.mention_entity_ranked("Foo", "class", 0);
2212 state.advance_cb();
2213
2214 state.begin_render();
2215 state.mention_entity_ranked("Bar", "class", 0);
2216 state.mention_entity_ranked("Foo", "class", 1);
2217 state.advance_cb();
2218 assert_eq!(state.last_transition(), Transition::Retain);
2219
2220 state.begin_render();
2221 state.mention_entity_ranked("Bar", "class", 0);
2222 state.advance_cb();
2223 assert_eq!(state.last_transition(), Transition::SmoothShift);
2224 }
2225
2226 #[test]
2227 fn transition_rough_shift_proper() {
2228 let mut state = DiscourseState::new();
2229 // u1: focus Foo. Cb = Foo. Cp = Foo. → NoCb (first render).
2230 state.begin_render();
2231 state.mention_entity_ranked("Foo", "class", 0);
2232 state.advance_cb();
2233
2234 // u2: Bar (rank 0), Foo (rank 1).
2235 // Cf overlap with u1 Cf={Foo}: Foo is shared. Cb = Foo.
2236 // prev_cb = Foo, new_cb = Foo, cp = Bar.
2237 // cb_eq_prev = true, cb_eq_cp = false → Retain.
2238 state.begin_render();
2239 state.mention_entity_ranked("Bar", "class", 0);
2240 state.mention_entity_ranked("Foo", "class", 1);
2241 state.advance_cb();
2242 assert_eq!(state.last_transition(), Transition::Retain);
2243
2244 // u3: Baz (rank 0), Bar (rank 1).
2245 // Cf overlap with u2 Cf={Bar,Foo}: Bar is in current_cf. Cb = Bar.
2246 // prev_cb = Foo (from u1→u2 transition), cp = Baz.
2247 // cb_eq_prev = (Bar==Foo) = false, cb_eq_cp = (Bar==Baz) = false → RoughShift.
2248 state.begin_render();
2249 state.mention_entity_ranked("Baz", "class", 0);
2250 state.mention_entity_ranked("Bar", "class", 1);
2251 state.advance_cb();
2252 assert_eq!(state.last_transition(), Transition::RoughShift);
2253 }
2254
2255 #[test]
2256 fn cf_deduplicates_by_name_keeping_lower_rank() {
2257 let mut state = DiscourseState::new();
2258 state.begin_render();
2259 state.mention_entity_ranked("Foo", "class", 2);
2260 state.mention_entity_ranked("Foo", "class", 0);
2261 let cf = state.cf();
2262 assert_eq!(cf.len(), 1);
2263 assert_eq!(cf[0].rank, 0);
2264 }
2265
2266 #[test]
2267 fn cf_deduplication_keeps_lower_rank_when_second_is_higher() {
2268 let mut state = DiscourseState::new();
2269 state.begin_render();
2270 state.mention_entity_ranked("Foo", "class", 0);
2271 state.mention_entity_ranked("Foo", "class", 2);
2272 let cf = state.cf();
2273 assert_eq!(cf.len(), 1);
2274 assert_eq!(cf[0].rank, 0);
2275 }
2276
2277 #[test]
2278 fn cf_sorts_by_rank_ascending() {
2279 let mut state = DiscourseState::new();
2280 state.begin_render();
2281 state.mention_entity_ranked("Obj", "class", 1);
2282 state.mention_entity_ranked("Subj", "class", 0);
2283 state.mention_entity_ranked("Oblique", "class", 2);
2284 let cf = state.cf();
2285 assert_eq!(cf[0].name, "Subj");
2286 assert_eq!(cf[1].name, "Obj");
2287 assert_eq!(cf[2].name, "Oblique");
2288 }
2289
2290 #[test]
2291 fn cp_is_first_cf_entry() {
2292 let mut state = DiscourseState::new();
2293 state.begin_render();
2294 state.mention_entity_ranked("Subj", "class", 0);
2295 state.mention_entity_ranked("Obj", "class", 1);
2296 assert_eq!(state.cf()[0].name, "Subj");
2297 }
2298
2299 #[test]
2300 fn cf_cleared_by_begin_render() {
2301 let mut state = DiscourseState::new();
2302 state.begin_render();
2303 state.mention_entity_ranked("Foo", "class", 0);
2304 assert_eq!(state.cf().len(), 1);
2305
2306 state.begin_render();
2307 assert_eq!(
2308 state.cf().len(),
2309 0,
2310 "current_cf must be cleared by begin_render"
2311 );
2312 }
2313
2314 #[test]
2315 fn previous_cf_set_after_advance_cb() {
2316 let mut state = DiscourseState::new();
2317 state.begin_render();
2318 state.mention_entity_ranked("Foo", "class", 0);
2319 state.mention_entity_ranked("Bar", "class", 1);
2320 state.advance_cb();
2321
2322 let prev = state.previous_cf();
2323 assert_eq!(prev.len(), 2);
2324 assert_eq!(prev[0].name, "Foo");
2325 assert_eq!(prev[1].name, "Bar");
2326 }
2327
2328 #[test]
2329 fn mention_entity_delegates_to_rank_zero() {
2330 let mut state = DiscourseState::new();
2331 state.begin_render();
2332 state.mention_entity("Foo", "class");
2333 let cf = state.cf();
2334 assert_eq!(cf.len(), 1);
2335 assert_eq!(cf[0].rank, 0);
2336 }
2337
2338 #[test]
2339 fn classify_transition_all_cases() {
2340 // Continue: cb == prev_cb AND cb == cp.
2341 assert_eq!(
2342 classify_transition(Some("Foo"), Some("Foo"), Some("Foo")),
2343 Transition::Continue
2344 );
2345 // Retain: cb == prev_cb, cb != cp.
2346 assert_eq!(
2347 classify_transition(Some("Foo"), Some("Foo"), Some("Bar")),
2348 Transition::Retain
2349 );
2350 // SmoothShift: cb != prev_cb, cb == cp.
2351 assert_eq!(
2352 classify_transition(Some("Bar"), Some("Foo"), Some("Bar")),
2353 Transition::SmoothShift
2354 );
2355 // RoughShift: cb != prev_cb, cb != cp.
2356 assert_eq!(
2357 classify_transition(Some("Bar"), Some("Foo"), Some("Baz")),
2358 Transition::RoughShift
2359 );
2360 // NoCb: no current cb.
2361 assert_eq!(
2362 classify_transition(None, Some("Foo"), Some("Bar")),
2363 Transition::NoCb
2364 );
2365 // NoCb with all None.
2366 assert_eq!(classify_transition(None, None, None), Transition::NoCb);
2367 }
2368
2369 #[test]
2370 fn reset_clears_cf_and_transition_state() {
2371 let mut state = DiscourseState::new();
2372 state.begin_render();
2373 state.mention_entity_ranked("Foo", "class", 0);
2374 state.advance_cb();
2375 state.reset();
2376
2377 assert_eq!(state.cf().len(), 0);
2378 assert_eq!(state.previous_cf().len(), 0);
2379 assert_eq!(state.last_transition(), Transition::NoCb);
2380 }
2381}