prosaic_core/
discourse.rs

1#[cfg(not(feature = "std"))]
2use alloc::string::{String, ToString};
3#[cfg(not(feature = "std"))]
4use alloc::vec::Vec;
5
6use crate::collections::{HashMap, HashSet, VecDeque, new_map, new_set};
7
8/// A forward-looking center: an entity realized in an utterance with its
9/// grammatical-role-based salience rank (lower = more prominent).
10///
11/// Rank 0 corresponds to the Subject position; higher ranks correspond to
12/// Object (1), Indirect Object / Location (2), and Oblique (3+).
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub struct Cf {
16    /// Entity name as passed to `mention_entity` or `mention_entity_ranked`.
17    pub name: String,
18    /// Grammatical-role-based rank (lower = more prominent). Rank 0 is Subject.
19    pub rank: u8,
20}
21
22/// Centering Theory transition class between consecutive utterances.
23///
24/// Prefer (in order): `Continue` > `Retain` > `SmoothShift` > `RoughShift`.
25/// `NoCb` means no coherent transition could be classified (first render,
26/// post-reset, or utterance with no entities).
27///
28/// Based on Grosz, Joshi & Weinstein (1995).
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31pub enum Transition {
32    /// Cb(n) == Cb(n−1) and Cb(n) == Cp(n): most coherent, entity in focus stays.
33    Continue,
34    /// Cb(n) == Cb(n−1) but Cb(n) != Cp(n): coherent but not the most salient entity.
35    Retain,
36    /// Cb(n) != Cb(n−1) but Cb(n) == Cp(n): focus shifts cleanly to the new center.
37    SmoothShift,
38    /// Cb(n) != Cb(n−1) and Cb(n) != Cp(n): least coherent shift.
39    RoughShift,
40    /// No transition could be classified: first render, post-reset, or no entities.
41    NoCb,
42}
43
44/// Private word interner. Maps lowercased words to stable `u32` ids.
45/// Lowercasing happens at intern time; callers must pass already-lowercased
46/// input to `intern`/`get`.
47#[derive(Debug, Clone, Default)]
48struct WordInterner {
49    /// Lowercased word → u32 id.
50    by_word: HashMap<String, u32>,
51    /// Reverse map for debugging. Indexed by id.
52    by_id: Vec<String>,
53}
54
55impl WordInterner {
56    fn intern(&mut self, word: &str) -> u32 {
57        if let Some(&id) = self.by_word.get(word) {
58            return id;
59        }
60        let id = self.by_id.len() as u32;
61        let owned = word.to_string();
62        self.by_word.insert(owned.clone(), id);
63        self.by_id.push(owned);
64        id
65    }
66
67    fn get(&self, word: &str) -> Option<u32> {
68        self.by_word.get(word).copied()
69    }
70}
71
72/// Tracks discourse state across multiple render calls for natural output.
73///
74/// This is the engine's internal memory — it knows what entities were recently
75/// mentioned, what templates were recently used, what connectives were recently
76/// inserted, and what words appeared in recent output.
77#[derive(Debug, Clone)]
78pub struct DiscourseState {
79    /// Tracks entities by name → (entity_type, render_index_of_last_mention).
80    entities: HashMap<String, EntityMention>,
81
82    /// The current render index (incremented each render call).
83    render_index: usize,
84
85    /// The name of the most recently mentioned entity (for pronoun resolution).
86    focus_entity: Option<String>,
87
88    /// Last template variant index used per template key (for anti-repeat).
89    template_history: HashMap<String, usize>,
90
91    /// Recently used discourse connectives (ring buffer, max 6).
92    connective_history: VecDeque<String>,
93
94    /// Per-decision family slot for connective selection: `Some(family)`
95    /// when a connective was emitted, `None` when the family budget
96    /// suppressed one so the sentence ran plain. Tracked alongside
97    /// `connective_history` but including null slots so dense
98    /// same-family runs can be detected even when exact strings differ.
99    connective_family_history: VecDeque<Option<ConnectorFamily>>,
100
101    /// The template key used in the previous render (for relationship detection).
102    last_template_key: Option<String>,
103
104    /// The primary entity name from the previous render.
105    last_entity_name: Option<String>,
106
107    /// Non-stopword tokens from recent renders, with render_index.
108    /// Words are stored as interned `u32` ids — see `interner`.
109    /// Kept for a window of the last 5 renders.
110    word_history: VecDeque<(usize, HashSet<u32>)>,
111
112    /// Word counts from recently emitted sentences. Used to avoid a flat
113    /// mid-length cadence when multiple template variants are available.
114    sentence_length_history: VecDeque<usize>,
115
116    /// Word interner shared across all render history. Lowercasing happens
117    /// once at intern time; all subsequent lookups use pre-lowercased ids.
118    interner: WordInterner,
119
120    /// Pre-interned ids for every stopword in `STOPWORDS`. Populated once
121    /// during construction so `record_output_words` never scans strings.
122    stopword_ids: HashSet<u32>,
123
124    /// Monotonic cycle index used by [`Self::next_list_style`]. The selected
125    /// style is found by walking `LIST_STYLES` from this index forward,
126    /// skipping any style currently in `recent_list_styles`. The index is
127    /// advanced past the picked slot.
128    ///
129    /// Persists across paragraph-boundary resets so consecutive paragraphs
130    /// rotate through the list-style pool instead of restarting at the same
131    /// phrasing every time. This mirrors the cross-paragraph semantics of
132    /// `Session::last_temporal_anchor`. Use [`Self::reset_list_cycle`] (or
133    /// the [`DiscourseState::reset`] hard reset) to clear it.
134    last_list_style: usize,
135
136    /// Trailing window of recently chosen list styles, capped at
137    /// [`LIST_STYLE_RECENT_WINDOW`]. Both auto-picked and explicitly forced
138    /// styles are recorded here so the next auto pick deterministically
139    /// avoids them. Persists across paragraph resets alongside
140    /// `last_list_style`; cleared by [`Self::reset_list_cycle`] and the
141    /// full [`Self::reset`].
142    recent_list_styles: VecDeque<ListStyle>,
143
144    /// Whether the current focus is a compound/plural subject, so pronoun
145    /// continuations should use "they/them" instead of "it".
146    focus_is_plural: bool,
147
148    /// Backward-looking center for the NEXT render. Updated at the end of each
149    /// successful render via `advance_cb`. `None` before the first render, after
150    /// a reset, or when no coherent transition is available (Rough Shift).
151    cb: Option<String>,
152
153    /// Focus entity of the render immediately before the current one. Used to
154    /// compute Cb transitions. Different from `focus_entity`: that tracks the
155    /// current render's focus; this tracks what `focus_entity` was at the point
156    /// `advance_cb` was last called.
157    previous_focus: Option<String>,
158
159    /// Forward-looking centers being built during the CURRENT render.
160    /// Populated by `mention_entity_ranked`, cleared by `begin_render`.
161    /// Ordered by rank ascending (lowest rank first); ties broken by insertion
162    /// order. The first element is the Cp (preferred center).
163    current_cf: Vec<Cf>,
164
165    /// Forward-looking centers from the PREVIOUS render. Set by
166    /// `compute_cb_transition` as a snapshot of `current_cf`. Used to
167    /// identify the Cb as the highest-ranked Cf member shared with the
168    /// previous utterance.
169    previous_cf: Vec<Cf>,
170
171    /// Transition classification computed by the most recent `advance_cb`
172    /// call. `Transition::NoCb` before any render or after a reset.
173    last_transition: Transition,
174
175    /// List style chosen by the most recent `|join` pipe during the
176    /// current render. `None` when no `|join` fired. Cleared at the
177    /// start of every render so [`RenderExplanation`] always reports
178    /// the value for *this* render.
179    last_list_style_used: Option<ListStyle>,
180
181    /// Whether the most recent render's Silent-mode cleanup stripped
182    /// any trailing orphan words. Cleared at the start of every render.
183    /// Exposed via [`RenderExplanation::cleanup_stripped_tail`].
184    last_cleanup_stripped_tail: bool,
185}
186
187#[derive(Debug, Clone)]
188struct EntityMention {
189    entity_type: String,
190    last_mentioned: usize,
191    mention_count: usize,
192}
193
194/// How an entity should be referred to based on discourse context.
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
197pub enum ReferenceForm {
198    /// Full form: "The class UserService"
199    Full,
200    /// Name only: "UserService"
201    ShortName,
202    /// Pronoun: "it" / "they" / (lang-specific)
203    Pronoun,
204    /// Demonstrative determiner + type: "this class" / (lang-specific).
205    /// Reserved slot for future discourse rules; not currently emitted by
206    /// `DiscourseState::reference_form`.
207    Demonstrative,
208    /// Possessive pronoun/determiner: "its" / "their" / (lang-specific).
209    /// Used by the `{name|possessive}` pipe after the standard discourse
210    /// policy has decided that a pronoun-form reference is appropriate.
211    Possessive,
212    /// Zero realization: surface is empty. Used by pro-drop languages
213    /// (Japanese, colloquial Spanish/Italian) where the pronoun is
214    /// recoverable from context and the slot emits nothing.
215    /// Not currently emitted by the default `DiscourseState::reference_form`;
216    /// language-specific discourse extensions may choose this form.
217    Zero,
218}
219
220/// The relationship detected between consecutive renders.
221#[derive(Debug, Clone, PartialEq, Eq)]
222pub enum DiscourseRelation {
223    /// Same entity, different action
224    SameEntityDifferentAction,
225    /// Different entity, same action type
226    DifferentEntitySameAction,
227    /// Contrasting actions (e.g., add vs delete)
228    Contrast,
229    /// No detectable relationship
230    None,
231}
232
233/// List formatting style.
234#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
235#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
236pub enum ListStyle {
237    /// "including A, B, and C among others"
238    Including,
239    /// "such as A, B, and C"
240    SuchAs,
241    /// "— notably A, B, and C, plus N others"
242    Dash,
243    /// "[A, B, and C, and N more]" (original format)
244    Bracketed,
245    /// "A, B, and C, among others" — postfix qualifier, drops remainder count.
246    AmongOthers,
247    /// "A, B, and C, to name a few" — postfix qualifier, drops remainder count.
248    ToNameAFew,
249    /// "A, B, and C, plus N more" — postfix qualifier, uses remainder count.
250    PlusMore,
251}
252
253const CONNECTIVE_WINDOW: usize = 6;
254
255/// Sliding-window length used by the connector-family budget. A family is
256/// allowed at most `pool.len()` emissions inside this window before
257/// `select_connective` starts returning `None` so the next follow-on
258/// sentence renders plain. Sized to give the surface text two or three
259/// null slots after a fully saturated pool, which is what dissolves the
260/// `Similarly,/Likewise,` style alternation Matt flagged in service-shape
261/// prose.
262const FAMILY_WINDOW: usize = 5;
263
264/// Score deduction applied when a candidate would form an A/B/A
265/// alternation with the immediately preceding two emissions. Distances
266/// for unused candidates sit at `CONNECTIVE_WINDOW + 1`, so the penalty
267/// is large enough to demote a recently-seen alternation partner below
268/// any unused option but small enough to leave the LRU recycle cycle
269/// (A,B,C → A,B,C) unchanged when the pool offers a third choice.
270const ALTERNATION_PENALTY: i64 = 2;
271
272const WORD_HISTORY_WINDOW: usize = 5;
273const SENTENCE_RHYTHM_WINDOW: usize = 6;
274const ENTITY_REINTRODUCE_DISTANCE: usize = 3;
275
276/// Per-sentence penalty applied when consecutive sentences land on the same
277/// side of the running mean length. Small relative to the existing closeness
278/// (max 3.0) and mean-delta (max 1.0) contributions so it acts as a cadence
279/// tie-breaker rather than dominating the rhythm score.
280const SAME_SIDE_PENALTY: f64 = 0.75;
281
282/// Mean-delta threshold (in words) below which a sentence is treated as
283/// "at the mean" and contributes no same-side signal. Avoids spurious
284/// pivots when lengths sit exactly on or fractionally beside the mean.
285const SIDE_OF_MEAN_NEUTRAL_BAND: f64 = 0.5;
286
287#[derive(Copy, Clone, PartialEq, Eq)]
288enum CadenceSide {
289    Above,
290    Below,
291}
292
293fn side_of_mean(len: f64, mean: f64) -> Option<CadenceSide> {
294    let delta = len - mean;
295    if delta.abs() < SIDE_OF_MEAN_NEUTRAL_BAND {
296        None
297    } else if delta > 0.0 {
298        Some(CadenceSide::Above)
299    } else {
300        Some(CadenceSide::Below)
301    }
302}
303
304/// Stopwords excluded from the word frequency map.
305const STOPWORDS: &[&str] = &[
306    "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
307    "from", "is", "was", "are", "were", "be", "been", "being", "have", "has", "had", "do", "does",
308    "did", "will", "would", "could", "should", "may", "might", "shall", "can", "not", "no", "it",
309    "its", "this", "that", "these", "those", "which", "who", "what", "where", "when", "how", "if",
310    "then", "than", "so", "as", "up", "out", "into", "also", "just", "more", "most",
311];
312
313const LIST_STYLES: &[ListStyle] = &[
314    ListStyle::Including,
315    ListStyle::SuchAs,
316    ListStyle::Dash,
317    ListStyle::Bracketed,
318    ListStyle::AmongOthers,
319    ListStyle::ToNameAFew,
320    ListStyle::PlusMore,
321];
322
323/// Number of recent list-style picks remembered for anti-repeat. Each call to
324/// [`DiscourseState::next_list_style`] (and explicit recordings via
325/// [`DiscourseState::record_list_style_used`]) skips any style that appears in
326/// the trailing window, so consecutive truncated lists never repeat phrasing
327/// even when a forced style and the auto-cycle would otherwise collide.
328const LIST_STYLE_RECENT_WINDOW: usize = 2;
329
330/// Connective pools by relationship type.
331const SAME_ENTITY_CONNECTIVES: &[&str] = &["Additionally,", "Furthermore,", "It also"];
332
333const SAME_ACTION_CONNECTIVES: &[&str] = &["Similarly,", "Likewise,"];
334
335const CONTRAST_CONNECTIVES: &[&str] = &["Meanwhile,", "However,", "On the other hand,"];
336
337/// Number of distinct list styles in the cycle.
338pub(crate) fn list_styles_count() -> usize {
339    LIST_STYLES.len()
340}
341
342/// Lexical family a connector belongs to. The exact-string anti-repeat
343/// only sees individual connectors; the family lets the budget reason
344/// about whole categories ("similarity/continuation/contrast") so a
345/// two-element pool cannot lock the prose into an A/B/A/B alternation.
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
347enum ConnectorFamily {
348    /// Continuation/expansion: "Additionally,", "Furthermore,", "It also".
349    Continuation,
350    /// Similarity: "Similarly,", "Likewise,".
351    Similarity,
352    /// Contrast: "Meanwhile,", "However,", "On the other hand,".
353    Contrast,
354}
355
356fn family_for_relation(relation: &DiscourseRelation) -> Option<ConnectorFamily> {
357    match relation {
358        DiscourseRelation::SameEntityDifferentAction => Some(ConnectorFamily::Continuation),
359        DiscourseRelation::DifferentEntitySameAction => Some(ConnectorFamily::Similarity),
360        DiscourseRelation::Contrast => Some(ConnectorFamily::Contrast),
361        DiscourseRelation::None => None,
362    }
363}
364
365/// Map a connective string to its lexical family by membership in the
366/// per-relation pools. Returns `None` for strings outside the known set
367/// (e.g. discourse markers from `Default-Language`); those don't count
368/// toward the family-budget gate, matching the engine's accounting.
369fn family_for_connective(connective: &str) -> Option<ConnectorFamily> {
370    if SAME_ENTITY_CONNECTIVES.contains(&connective) {
371        Some(ConnectorFamily::Continuation)
372    } else if SAME_ACTION_CONNECTIVES.contains(&connective) {
373        Some(ConnectorFamily::Similarity)
374    } else if CONTRAST_CONNECTIVES.contains(&connective) {
375        Some(ConnectorFamily::Contrast)
376    } else {
377        None
378    }
379}
380
381impl DiscourseState {
382    pub fn new() -> Self {
383        let mut interner = WordInterner::default();
384        // Pre-intern all stopwords so membership checks are O(1) u32 lookups.
385        let stopword_ids: HashSet<u32> = STOPWORDS.iter().map(|&w| interner.intern(w)).collect();
386
387        Self {
388            entities: new_map(),
389            render_index: 0,
390            focus_entity: None,
391            template_history: new_map(),
392            connective_history: VecDeque::new(),
393            connective_family_history: VecDeque::new(),
394            last_template_key: None,
395            last_entity_name: None,
396            word_history: VecDeque::new(),
397            sentence_length_history: VecDeque::new(),
398            interner,
399            stopword_ids,
400            last_list_style: 0,
401            recent_list_styles: VecDeque::with_capacity(LIST_STYLE_RECENT_WINDOW),
402            last_list_style_used: None,
403            last_cleanup_stripped_tail: false,
404            focus_is_plural: false,
405            cb: None,
406            previous_focus: None,
407            current_cf: Vec::new(),
408            previous_cf: Vec::new(),
409            last_transition: Transition::NoCb,
410        }
411    }
412
413    /// Mark the current focus as a compound/plural subject so the next
414    /// pronoun reference uses "they" rather than "it".
415    pub fn set_focus_plural(&mut self, plural: bool) {
416        self.focus_is_plural = plural;
417    }
418
419    /// Whether the current focus is a plural/compound subject.
420    pub fn focus_is_plural(&self) -> bool {
421        self.focus_is_plural
422    }
423
424    /// Clear ALL discourse state, including the cross-paragraph list-style
425    /// cycle counter. Use when starting a fully unrelated narrative — most
426    /// callers want [`Self::reset_for_paragraph`] instead so consecutive
427    /// paragraphs continue to rotate list-style phrasings.
428    pub fn reset(&mut self) {
429        *self = Self::new();
430    }
431
432    /// Clear discourse state at a paragraph boundary while preserving the
433    /// narrative-level stylistic anti-repeat machinery. This is the reset
434    /// used by [`Session::reset_for_paragraph`] so multi-paragraph narratives
435    /// don't restart variant cycles, list-style rotation, word-repetition
436    /// penalties, or sentence-rhythm memory on every paragraph break.
437    ///
438    /// **Preserved (narrative-level):** `last_list_style` and
439    /// `recent_list_styles` (list-style cycle plus anti-repeat window),
440    /// `template_history` (variant anti-repeat), `connective_history`
441    /// (connective anti-repeat), `word_history` plus `interner` (repetition
442    /// scoring), `sentence_length_history` (cadence/rhythm scoring),
443    /// `render_index` (so word-history distances stay correct and
444    /// `has_prior_render` keeps reporting earlier discourse exists).
445    ///
446    /// **Cleared (paragraph-local):** the entity table, focus entity and its
447    /// plurality, `last_template_key`/`last_entity_name` (so cross-paragraph
448    /// relation/connective inference is suppressed), the Centering Theory
449    /// `Cb`/`Cf` machinery (`cb`, `previous_focus`, `current_cf`,
450    /// `previous_cf`, `last_transition`), and per-render diagnostic signals.
451    ///
452    /// The clearance set is the load-bearing invariant: anaphora must not
453    /// resolve to entities introduced in an earlier paragraph, and rhetorical
454    /// connectives ("Furthermore,", "However,") must not jump paragraph
455    /// boundaries.
456    pub fn reset_for_paragraph(&mut self) {
457        // Pronoun/anaphora sources.
458        self.entities.clear();
459        self.focus_entity = None;
460        self.focus_is_plural = false;
461        // Relation-detection inputs (drive cross-render connective insertion).
462        self.last_template_key = None;
463        self.last_entity_name = None;
464        // Centering Theory state.
465        self.cb = None;
466        self.previous_focus = None;
467        self.current_cf.clear();
468        self.previous_cf.clear();
469        self.last_transition = Transition::NoCb;
470        // Per-render diagnostics.
471        self.last_list_style_used = None;
472        self.last_cleanup_stripped_tail = false;
473        // Intentionally retained: last_list_style, recent_list_styles,
474        // template_history, connective_history,
475        // connective_family_history, word_history,
476        // sentence_length_history, interner, stopword_ids, render_index.
477    }
478
479    /// Clear only the list-style cycle counter and its anti-repeat window.
480    /// Mirrors [`Session::reset_temporal`] for callers that want to start a
481    /// fresh list-style rotation without otherwise resetting discourse state.
482    pub fn reset_list_cycle(&mut self) {
483        self.last_list_style = 0;
484        self.recent_list_styles.clear();
485    }
486
487    /// Advance to the next render. Must be called at the start of each render.
488    pub fn begin_render(&mut self) {
489        self.render_index += 1;
490        self.current_cf.clear();
491        // Reset per-render diagnostic signals so `RenderExplanation`
492        // always reports the value for THIS render rather than inheriting
493        // state from a previous one.
494        self.last_list_style_used = None;
495        self.last_cleanup_stripped_tail = false;
496    }
497
498    /// Record that an entity was mentioned in the current render at rank 0
499    /// (Subject position). Delegates to [`Self::mention_entity_ranked`].
500    ///
501    /// Resets the focus-plural flag — compound subjects must mark
502    /// themselves explicitly via [`Self::set_focus_plural`].
503    pub fn mention_entity(&mut self, name: &str, entity_type: &str) {
504        self.mention_entity_ranked(name, entity_type, 0);
505    }
506
507    /// Record that an entity was mentioned in the current render with an
508    /// explicit grammatical-role rank. Lower rank = more prominent.
509    ///
510    /// Rank convention:
511    /// - 0: Subject (most prominent — the Cp candidate)
512    /// - 1: Direct Object
513    /// - 2: Indirect Object / Location
514    /// - 3+: Oblique / other
515    ///
516    /// The entity is inserted into `current_cf` in rank-ascending order.
517    /// If the entity is already in the Cf list, the lower of the two ranks
518    /// is kept (a subject mention always beats an object mention).
519    ///
520    /// `focus_entity` is updated when rank == 0 or when no focus has been
521    /// set yet for this render; this keeps the Cp semantics: the Subject is
522    /// the preferred center.
523    pub fn mention_entity_ranked(&mut self, name: &str, entity_type: &str, rank: u8) {
524        let entry = self
525            .entities
526            .entry(name.to_string())
527            .or_insert(EntityMention {
528                entity_type: entity_type.to_string(),
529                last_mentioned: 0,
530                mention_count: 0,
531            });
532        entry.last_mentioned = self.render_index;
533        entry.mention_count += 1;
534        entry.entity_type = entity_type.to_string();
535
536        // Update focus_entity (= Cp) when this is the most prominent slot
537        // (rank 0) or when no focus has been established yet this render.
538        if rank == 0 || self.focus_entity.is_none() {
539            self.focus_entity = Some(name.to_string());
540            self.last_entity_name = Some(name.to_string());
541            self.focus_is_plural = false;
542        }
543
544        // Insert into current_cf, deduplicating by name (keep lower rank).
545        if let Some(existing) = self.current_cf.iter_mut().find(|c| c.name == name) {
546            if rank < existing.rank {
547                existing.rank = rank;
548                // Re-sort after rank update.
549                self.current_cf.sort_by_key(|c| c.rank);
550            }
551        } else {
552            self.current_cf.push(Cf {
553                name: name.to_string(),
554                rank,
555            });
556            // Sort stably so Cp = first element.
557            self.current_cf.sort_by_key(|c| c.rank);
558        }
559    }
560
561    /// Profile-aware variant of [`Self::reference_form`].
562    ///
563    /// `PronounDensity::Default` is identical to `reference_form`. `Low`
564    /// demotes any computed `Pronoun` to `ShortName`, biasing toward
565    /// formal register that keeps full names visible longer. `High`
566    /// promotes a `ShortName` to `Pronoun` when the entity is recent
567    /// enough (distance ≤ 2) and not in an ambiguity context — biasing
568    /// toward conversational register.
569    pub fn reference_form_with_density(
570        &self,
571        name: &str,
572        density_low: bool,
573        density_high: bool,
574    ) -> ReferenceForm {
575        let raw = self.reference_form(name);
576        if density_low {
577            return match raw {
578                ReferenceForm::Pronoun => ReferenceForm::ShortName,
579                other => other,
580            };
581        }
582        if density_high && raw == ReferenceForm::ShortName && self.is_pronoun_eligible_relaxed(name)
583        {
584            return ReferenceForm::Pronoun;
585        }
586        raw
587    }
588
589    fn is_pronoun_eligible_relaxed(&self, name: &str) -> bool {
590        let Some(mention) = self.entities.get(name) else {
591            return false;
592        };
593        let distance = self.render_index.saturating_sub(mention.last_mentioned);
594        if distance == 0 || distance > 2 {
595            return false;
596        }
597        if self.has_ambiguity(name) {
598            return false;
599        }
600        true
601    }
602
603    /// Determine how to refer to an entity given discourse history.
604    pub fn reference_form(&self, name: &str) -> ReferenceForm {
605        let mention = match self.entities.get(name) {
606            Some(m) => m,
607            None => return ReferenceForm::Full,
608        };
609
610        let distance = self.render_index.saturating_sub(mention.last_mentioned);
611
612        // If it's been too long, reintroduce with full form.
613        if distance >= ENTITY_REINTRODUCE_DISTANCE {
614            return ReferenceForm::Full;
615        }
616
617        // Candidate for pronoun under existing distance/focus/ambiguity rules.
618        let pronoun_candidate = distance == 1
619            && self.focus_entity.as_deref() == Some(name)
620            && !self.has_ambiguity(name);
621
622        if pronoun_candidate {
623            // Centering Theory Rule 1 gate:
624            //   If any element of Cf(Ui) is realized as a pronoun in Ui+1,
625            //   then the Cb(Ui+1) must also be realized as a pronoun.
626            //
627            // Practically: only pronominalize when the referent IS the Cb, or
628            // when there is no Cb yet (fresh discourse / post-reset / first
629            // named entity). If the Cb is a *different* entity, demoting to
630            // ShortName avoids an ambiguous pronoun resolution.
631            match self.cb.as_deref() {
632                // No Cb yet (first render or post-reset) — fall through to pronoun.
633                None => return ReferenceForm::Pronoun,
634                // Referent IS the Cb — Rule 1 permits pronominalization.
635                Some(cb_name) if cb_name == name => return ReferenceForm::Pronoun,
636                // Referent is NOT the Cb — Rule 1 demotes to ShortName to
637                // prevent an ambiguous pronoun whose referent is the Cb entity.
638                Some(_) => return ReferenceForm::ShortName,
639            }
640        }
641
642        // Short name for entities mentioned recently but not pronoun-eligible.
643        if distance > 0 && distance < ENTITY_REINTRODUCE_DISTANCE {
644            return ReferenceForm::ShortName;
645        }
646
647        ReferenceForm::Full
648    }
649
650    /// Check if there are multiple recently-mentioned entities that could cause
651    /// ambiguity when using a pronoun.
652    fn has_ambiguity(&self, name: &str) -> bool {
653        let recent_count = self
654            .entities
655            .iter()
656            .filter(|(n, m)| {
657                n.as_str() != name && self.render_index.saturating_sub(m.last_mentioned) <= 2
658            })
659            .count();
660        recent_count > 0
661    }
662
663    /// Record which template variant was selected for anti-repeat.
664    pub fn record_template_choice(&mut self, key: &str, variant_index: usize) {
665        self.template_history.insert(key.to_string(), variant_index);
666        self.last_template_key = Some(key.to_string());
667    }
668
669    /// Get the last variant index used for a key (to avoid repeating it).
670    pub fn last_template_variant(&self, key: &str) -> Option<usize> {
671        self.template_history.get(key).copied()
672    }
673
674    /// Detect the relationship between the current render and the previous one.
675    ///
676    /// Both entities must be present (and comparable) to assert a "same
677    /// entity" or "different entity" relationship — otherwise the engine
678    /// would incorrectly emit e.g. a *Similarly,* connective for a
679    /// repeated entity-less template, where no entity comparison is
680    /// actually meaningful.
681    pub fn detect_relation(
682        &self,
683        current_key: &str,
684        current_entity: Option<&str>,
685    ) -> DiscourseRelation {
686        let last_key = match &self.last_template_key {
687            Some(k) => k.as_str(),
688            None => return DiscourseRelation::None,
689        };
690
691        let last_entity = self.last_entity_name.as_deref();
692        let both_have_entities = current_entity.is_some() && last_entity.is_some();
693        let same_entity = both_have_entities && current_entity == last_entity;
694        let different_entity = both_have_entities && current_entity != last_entity;
695
696        let same_action = keys_share_action(current_key, last_key);
697        let contrasting = keys_contrast(current_key, last_key);
698
699        if same_entity && !same_action {
700            DiscourseRelation::SameEntityDifferentAction
701        } else if different_entity && same_action {
702            DiscourseRelation::DifferentEntitySameAction
703        } else if contrasting && both_have_entities {
704            DiscourseRelation::Contrast
705        } else {
706            DiscourseRelation::None
707        }
708    }
709
710    /// Select a discourse connective for the given relation, preferring
711    /// candidates absent from recent history. Three deterministic
712    /// guardrails layer on top of the LRU pick:
713    ///
714    /// 1. **Connector-family budget.** Each pool maps to a lexical family
715    ///    (continuation, similarity, contrast). When the family already
716    ///    contributes `pool.len()` emissions inside the trailing
717    ///    `FAMILY_WINDOW`, return `None` so the next sentence renders
718    ///    plain. This is the lever that breaks the
719    ///    `Similarly,/Likewise,/Similarly,/Likewise,` pattern Matt flagged
720    ///    in service-shape prose: the two-element similarity pool is
721    ///    forced to alternate after two emissions, so the third call
722    ///    drops the connective entirely.
723    /// 2. **Exact-connector cooldown.** The immediately preceding
724    ///    connective is excluded from candidacy when the pool offers an
725    ///    alternative — preserves the existing back-to-back anti-repeat.
726    /// 3. **A/B alternation penalty.** Candidates equal to
727    ///    `connective_history[len-2]` take a score deduction so the LRU
728    ///    pick will not extend an A/B pattern into A/B/A when a fresh
729    ///    option exists. For three-element pools this preserves the
730    ///    A,B,C cycle; for two-element pools the family budget kicks in
731    ///    first and the penalty is moot.
732    pub fn select_connective(&mut self, relation: &DiscourseRelation) -> Option<&'static str> {
733        self.select_connective_filtered(relation, None, None, None)
734    }
735
736    /// Profile-aware variant of [`Self::select_connective`].
737    ///
738    /// `allowed` (when `Some`) restricts the candidate pool to connectives
739    /// also present in the slice. If the resulting pool is empty (every
740    /// allowed entry was filtered by the existing anti-repeat or family
741    /// budget logic, OR no allowed entries match the base pool at all),
742    /// the engine falls back to the unfiltered base pool — profile
743    /// preferences are biases, never hard constraints.
744    ///
745    /// `preferred` (when `Some`) adds a per-connective tie-breaker bonus
746    /// to the existing distance/alternation score. Weights are interpreted
747    /// in `0.0..=1.0` and scaled by 10 to land in the same rough magnitude
748    /// as the existing scoring terms.
749    ///
750    /// `forbidden` (when `Some`) is a strict subtractive filter applied
751    /// *after* the allowed/fallback computation — used by the
752    /// retrospective refine pass for `BlacklistConnective` constraints.
753    /// Unlike `allowed`, an empty post-`forbidden` pool emits `None`
754    /// rather than falling back: that's the whole point of a blacklist.
755    pub fn select_connective_filtered(
756        &mut self,
757        relation: &DiscourseRelation,
758        allowed: Option<&[&str]>,
759        preferred: Option<&[(&str, f32)]>,
760        forbidden: Option<&[&str]>,
761    ) -> Option<&'static str> {
762        let base_pool: &[&'static str] = match relation {
763            DiscourseRelation::SameEntityDifferentAction => SAME_ENTITY_CONNECTIVES,
764            DiscourseRelation::DifferentEntitySameAction => SAME_ACTION_CONNECTIVES,
765            DiscourseRelation::Contrast => CONTRAST_CONNECTIVES,
766            DiscourseRelation::None => return None,
767        };
768        let family = family_for_relation(relation)
769            .expect("non-None relation always maps to a connector family");
770
771        // Apply the profile-allowed filter when one is supplied. An empty
772        // post-filter pool falls through to the base pool — profile
773        // preferences are biases, not hard constraints.
774        let filtered: Option<Vec<&'static str>> = allowed.map(|allow| {
775            base_pool
776                .iter()
777                .copied()
778                .filter(|c| allow.contains(c))
779                .collect()
780        });
781        let after_allowed: &[&'static str] = match &filtered {
782            Some(v) if !v.is_empty() => v.as_slice(),
783            _ => base_pool,
784        };
785
786        // Apply the strict-forbidden filter (refine-pass blacklist) on
787        // top of `after_allowed`. Empty post-forbidden pool → no
788        // connective emitted (None). This is the intentional asymmetry
789        // with `allowed`: blacklist is a hard constraint.
790        let strictly_filtered: Option<Vec<&'static str>> = forbidden.map(|forbid| {
791            after_allowed
792                .iter()
793                .copied()
794                .filter(|c| !forbid.contains(c))
795                .collect()
796        });
797        let pool_owned: Vec<&'static str>;
798        let pool: &[&'static str] = match &strictly_filtered {
799            Some(v) => {
800                if v.is_empty() {
801                    self.record_family_slot(None);
802                    return None;
803                }
804                pool_owned = v.clone();
805                pool_owned.as_slice()
806            }
807            None => after_allowed,
808        };
809
810        // Family-budget gate: count this family's emissions inside the
811        // trailing window. Once they saturate the (effective) pool,
812        // suppress the connective so the prose continues without a
813        // transition cue.
814        let family_count = self
815            .connective_family_history
816            .iter()
817            .rev()
818            .take(FAMILY_WINDOW)
819            .filter(|slot| **slot == Some(family))
820            .count();
821        if family_count >= pool.len() {
822            self.record_family_slot(None);
823            return None;
824        }
825
826        let immediate = self.connective_history.back().map(String::as_str);
827        let two_back = self
828            .connective_history
829            .iter()
830            .rev()
831            .nth(1)
832            .map(String::as_str);
833
834        let prefer_bonus = |connective: &str| -> i64 {
835            let Some(prefs) = preferred else {
836                return 0;
837            };
838            prefs
839                .iter()
840                .find_map(|(s, w)| if *s == connective { Some(*w) } else { None })
841                .map(|w| (w * 10.0) as i64)
842                .unwrap_or(0)
843        };
844
845        let mut selected: Option<&'static str> = None;
846        let mut selected_score: i64 = i64::MIN;
847
848        for &connective in pool {
849            if pool.len() > 1 && immediate == Some(connective) {
850                continue;
851            }
852
853            let distance = self
854                .connective_history
855                .iter()
856                .rev()
857                .position(|history| history == connective)
858                .unwrap_or(CONNECTIVE_WINDOW + 1) as i64;
859
860            let alternation_penalty = if pool.len() > 1 && two_back == Some(connective) {
861                ALTERNATION_PENALTY
862            } else {
863                0
864            };
865            let score = distance - alternation_penalty + prefer_bonus(connective);
866
867            if selected.is_none() || score > selected_score {
868                selected = Some(connective);
869                selected_score = score;
870            }
871        }
872
873        let connective = selected?;
874        self.connective_history.push_back(connective.to_string());
875        if self.connective_history.len() > CONNECTIVE_WINDOW {
876            self.connective_history.pop_front();
877        }
878        self.record_family_slot(Some(family));
879
880        Some(connective)
881    }
882
883    /// Push a per-decision family slot, capping the ring buffer at
884    /// `FAMILY_WINDOW + 2` so the budget check has the full window plus
885    /// a small lookahead margin without growing without bound.
886    fn record_family_slot(&mut self, slot: Option<ConnectorFamily>) {
887        self.connective_family_history.push_back(slot);
888        if self.connective_family_history.len() > FAMILY_WINDOW + 2 {
889            self.connective_family_history.pop_front();
890        }
891    }
892
893    /// Record the words from a rendered output for repetition scoring.
894    pub fn record_output_words(&mut self, output: &str) {
895        let mut ids: HashSet<u32> = new_set();
896        for raw in output.split_whitespace() {
897            let w = raw
898                .trim_matches(|c: char| !c.is_alphanumeric())
899                .to_lowercase();
900            if w.len() <= 2 {
901                continue;
902            }
903            let id = self.interner.intern(&w);
904            if self.stopword_ids.contains(&id) {
905                continue;
906            }
907            ids.insert(id);
908        }
909
910        self.word_history.push_back((self.render_index, ids));
911
912        // Trim to window
913        while self.word_history.len() > WORD_HISTORY_WINDOW {
914            self.word_history.pop_front();
915        }
916    }
917
918    /// Iterate over the recent sentence-length history (newest last).
919    /// Each value is the word count of one emitted sentence inside the
920    /// rhythm-tracking window. Exposed for profile-aware scorers that
921    /// need to read the cadence buffer without snapshotting the whole
922    /// session — the buffer is short and read-only from outside.
923    pub fn sentence_length_iter(&self) -> impl Iterator<Item = usize> + '_ {
924        self.sentence_length_history.iter().copied()
925    }
926
927    /// Record word counts for the sentences emitted by the committed render.
928    pub fn record_sentence_rhythm(&mut self, output: &str) {
929        for len in sentence_word_counts(output) {
930            self.sentence_length_history.push_back(len);
931            while self.sentence_length_history.len() > SENTENCE_RHYTHM_WINDOW {
932                self.sentence_length_history.pop_front();
933            }
934        }
935    }
936
937    /// Score a candidate output for repetition against recent history.
938    /// Lower score = less repetition = better.
939    pub fn repetition_score(&self, candidate: &str) -> f64 {
940        // Collect candidate word ids; new words may not be in the interner
941        // yet, so use `get` (read-only) and skip unknowns — they have no
942        // history so they contribute zero to the score.
943        let candidate_ids: HashSet<u32> = candidate
944            .split_whitespace()
945            .filter_map(|raw| {
946                let w = raw
947                    .trim_matches(|c: char| !c.is_alphanumeric())
948                    .to_lowercase();
949                if w.len() <= 2 {
950                    return None;
951                }
952                let id = self.interner.get(&w)?;
953                if self.stopword_ids.contains(&id) {
954                    return None;
955                }
956                Some(id)
957            })
958            .collect();
959
960        let mut score = 0.0;
961        for (idx, ids) in &self.word_history {
962            let distance = self.render_index.saturating_sub(*idx);
963            let overlap = candidate_ids.intersection(ids).count();
964            // Closer renders penalized more heavily
965            let weight = match distance {
966                0 | 1 => 3.0,
967                2 => 2.0,
968                3 => 1.0,
969                _ => 0.5,
970            };
971            score += overlap as f64 * weight;
972        }
973        score
974    }
975
976    /// Score a candidate output against recent sentence-length cadence.
977    /// Lower is better: candidates with sentence lengths that were just
978    /// emitted receive a penalty, while noticeably shorter or longer variants
979    /// are preferred when repetition scores are otherwise close.
980    ///
981    /// In addition to the per-sentence closeness/mean components, a bounded
982    /// same-side penalty fires for each consecutive sentence pair (history
983    /// → candidate, then candidate → candidate) that lands on the same side
984    /// of the running mean. This nudges the selector toward burst-pivot
985    /// cadence — alternating short/long around the mean — which is a hallmark
986    /// of natural prose. The penalty is purely additive and capped per
987    /// sentence so it cannot zero out repetition penalties or push the score
988    /// negative.
989    pub fn sentence_rhythm_score(&self, candidate: &str) -> f64 {
990        let candidate_lengths = sentence_word_counts(candidate);
991        if candidate_lengths.is_empty() || self.sentence_length_history.is_empty() {
992            return 0.0;
993        }
994
995        let recent_mean = self.sentence_length_history.iter().sum::<usize>() as f64
996            / self.sentence_length_history.len() as f64;
997
998        // Side of mean for the most recent emitted sentence, if any. Sentences
999        // exactly at the mean are treated as neutral (None) and never trigger
1000        // a same-side penalty in either direction.
1001        let mut prev_side = self
1002            .sentence_length_history
1003            .back()
1004            .and_then(|len| side_of_mean(*len as f64, recent_mean));
1005
1006        let mut score = 0.0;
1007        for len in &candidate_lengths {
1008            let closest = self
1009                .sentence_length_history
1010                .iter()
1011                .map(|recent| recent.abs_diff(*len))
1012                .min()
1013                .unwrap_or(usize::MAX);
1014
1015            score += match closest {
1016                0 => 3.0,
1017                1 => 2.0,
1018                2 => 1.0,
1019                3 => 0.5,
1020                _ => 0.0,
1021            };
1022
1023            let mean_delta = (*len as f64 - recent_mean).abs();
1024            if mean_delta < 1.0 {
1025                score += 1.0;
1026            } else if mean_delta < 2.0 {
1027                score += 0.5;
1028            }
1029
1030            let cur_side = side_of_mean(*len as f64, recent_mean);
1031            if let (Some(prev), Some(cur)) = (prev_side, cur_side)
1032                && prev == cur
1033            {
1034                score += SAME_SIDE_PENALTY;
1035            }
1036            // Carry candidate side forward so within-candidate runs (e.g.
1037            // long → long → long) accumulate the penalty across each pair,
1038            // not just against history.
1039            if cur_side.is_some() {
1040                prev_side = cur_side;
1041            }
1042        }
1043
1044        score / candidate_lengths.len() as f64
1045    }
1046
1047    /// Recency-weighted frequency of a specific word in recent output.
1048    /// Higher numbers mean the word has appeared recently and/or often.
1049    /// Used to pick the least-recently-used synonym from a registered
1050    /// group for elegant variation.
1051    pub fn word_frequency(&self, word: &str) -> f64 {
1052        let lower = word.to_lowercase();
1053        // Word must already be interned; if it has never appeared in history
1054        // its frequency is zero by definition.
1055        let id = match self.interner.get(&lower) {
1056            Some(id) => id,
1057            None => return 0.0,
1058        };
1059        let mut score = 0.0;
1060        for (idx, ids) in &self.word_history {
1061            if !ids.contains(&id) {
1062                continue;
1063            }
1064            let distance = self.render_index.saturating_sub(*idx);
1065            let weight = match distance {
1066                0 | 1 => 3.0,
1067                2 => 2.0,
1068                3 => 1.0,
1069                _ => 0.5,
1070            };
1071            score += weight;
1072        }
1073        score
1074    }
1075
1076    /// Select the next list style. Walks `LIST_STYLES` deterministically from
1077    /// `last_list_style` forward and returns the first style that is not in
1078    /// the recent-window (`recent_list_styles`). The walk advances past the
1079    /// chosen slot so subsequent calls progress through the palette rather
1080    /// than locking onto the first non-recent slot.
1081    ///
1082    /// Anti-repeat is fully deterministic — no RNG dependency — and ensures
1083    /// that an explicit forced style (e.g. `{|join:bracketed}` recorded via
1084    /// [`Self::record_list_style_used`]) does not collide with the very next
1085    /// auto-cycle pick. Falls back to the modulo slot if every style somehow
1086    /// sits in the recent window (unreachable while
1087    /// `LIST_STYLE_RECENT_WINDOW < LIST_STYLES.len()`, but kept defensive).
1088    pub fn next_list_style(&mut self) -> ListStyle {
1089        self.next_list_style_with_bias(None)
1090    }
1091
1092    /// Profile-aware variant of [`Self::next_list_style`].
1093    ///
1094    /// When `bias` is `Some(target)` and `target` is not currently inside
1095    /// the anti-repeat window, the cycle advances to the slot just past
1096    /// `target` and emits it. When `bias` is `None` (i.e., the profile's
1097    /// `ListStyleBias::Auto` default), or when the bias target is in the
1098    /// recent window, the natural cycle picks as in `next_list_style`.
1099    /// The bias is a preference, not an override — anti-repeat always wins.
1100    pub fn next_list_style_with_bias(&mut self, bias: Option<ListStyle>) -> ListStyle {
1101        if let Some(target) = bias
1102            && !self.recent_list_styles.contains(&target)
1103            && let Some(target_idx) = LIST_STYLES.iter().position(|s| *s == target)
1104        {
1105            // Advance the cycle to the slot just past the bias target so
1106            // the natural rotation continues coherently afterward, then
1107            // emit the target.
1108            self.last_list_style = target_idx.wrapping_add(1);
1109            self.push_recent_list_style(target);
1110            self.last_list_style_used = Some(target);
1111            return target;
1112        }
1113
1114        let len = LIST_STYLES.len();
1115        let start = self.last_list_style % len;
1116
1117        let mut chosen_offset = 0;
1118        for offset in 0..len {
1119            let candidate = LIST_STYLES[(start + offset) % len];
1120            if !self.recent_list_styles.contains(&candidate) {
1121                chosen_offset = offset;
1122                break;
1123            }
1124        }
1125
1126        let style = LIST_STYLES[(start + chosen_offset) % len];
1127        // Advance past the picked slot so the cycle continues to make
1128        // forward progress rather than re-evaluating from the same start
1129        // on the next call.
1130        self.last_list_style = self.last_list_style.wrapping_add(chosen_offset + 1);
1131        self.push_recent_list_style(style);
1132        self.last_list_style_used = Some(style);
1133        style
1134    }
1135
1136    /// Record an explicit list style (e.g. `{|join:bracketed}`) for
1137    /// diagnostics AND anti-repeat. Forced styles count toward the recent
1138    /// window so a subsequent auto-cycle pick won't immediately repeat the
1139    /// forced phrasing.
1140    pub fn record_list_style_used(&mut self, style: ListStyle) {
1141        self.push_recent_list_style(style);
1142        self.last_list_style_used = Some(style);
1143    }
1144
1145    fn push_recent_list_style(&mut self, style: ListStyle) {
1146        // Drop duplicates of `style` already in the window before we push,
1147        // so the trailing slot is always "the most recent N *distinct*
1148        // styles" rather than the same forced style filling the buffer.
1149        self.recent_list_styles.retain(|&s| s != style);
1150        if self.recent_list_styles.len() == LIST_STYLE_RECENT_WINDOW {
1151            self.recent_list_styles.pop_front();
1152        }
1153        self.recent_list_styles.push_back(style);
1154    }
1155
1156    /// List style applied by the most recent render's `|join` pipe (if any).
1157    pub fn last_list_style_used(&self) -> Option<ListStyle> {
1158        self.last_list_style_used
1159    }
1160
1161    /// Push phantom entries onto `connective_history` AND
1162    /// `connective_family_history` so the next connective selection treats
1163    /// these as recently used by both the exact-cooldown rule and the
1164    /// family-budget gate. Each connective is mapped to its lexical family
1165    /// (Continuation / Similarity / Contrast) by membership in the
1166    /// per-relation pools; unknown strings push a `None` family slot so
1167    /// the budget gate is unaffected. Pushes are bounded by the same
1168    /// window caps the live emit path uses; phantom entries decay
1169    /// naturally as new emissions arrive. Used by the retrospective
1170    /// refine pass to apply `PrimeRecencyWindow` constraints.
1171    pub(crate) fn prime_connective_history(&mut self, connectives: &[String]) {
1172        for c in connectives {
1173            self.connective_history.push_back(c.clone());
1174            if self.connective_history.len() > CONNECTIVE_WINDOW {
1175                self.connective_history.pop_front();
1176            }
1177            let family = family_for_connective(c.as_str());
1178            self.record_family_slot(family);
1179        }
1180    }
1181
1182    /// Push phantom entries onto `recent_list_styles` so the next
1183    /// auto-cycle pick treats these styles as recently used. Mirrors the
1184    /// dedup-and-cap semantics of [`Self::push_recent_list_style`]: an
1185    /// already-recent style is moved to the trailing slot rather than
1186    /// duplicated. Used by the retrospective refine pass to apply
1187    /// `PrimeRecencyWindow` constraints.
1188    pub(crate) fn prime_list_style_history(&mut self, list_styles: &[ListStyle]) {
1189        for &style in list_styles {
1190            self.push_recent_list_style(style);
1191        }
1192    }
1193
1194    /// Record whether Silent-mode cleanup stripped any trailing orphan words
1195    /// during the most recent render.
1196    pub fn set_cleanup_stripped_tail(&mut self, stripped: bool) {
1197        self.last_cleanup_stripped_tail = stripped;
1198    }
1199
1200    /// Whether the most recent render's cleanup pass removed trailing
1201    /// orphan words (Silent strictness only). `false` in other modes.
1202    pub fn last_cleanup_stripped_tail(&self) -> bool {
1203        self.last_cleanup_stripped_tail
1204    }
1205
1206    /// Whether this is the first render (no prior discourse context).
1207    pub fn is_first_render(&self) -> bool {
1208        self.render_index <= 1
1209    }
1210
1211    /// Whether a prior render happened in this discourse scope, used by
1212    /// the `{noun|demonstrative}` pipe to decide between "this X" and
1213    /// "the X". Cleared by `reset()`.
1214    pub fn has_prior_render(&self) -> bool {
1215        // begin_render has already bumped render_index for the current
1216        // render, so strictly greater than 1 means at least one earlier
1217        // render contributed to discourse state.
1218        self.render_index > 1
1219    }
1220
1221    /// Advance Cb tracking for the next render. Call this after all mutations
1222    /// from the current render (`mention_entity`, `record_output_words`) have
1223    /// completed and the render has committed. On render failure the
1224    /// `Session` snapshot/restore path will roll back `cb` and `previous_focus`
1225    /// along with all other fields via `Clone`.
1226    ///
1227    /// Called by `Engine::render_tx` at the end of each successful render.
1228    pub fn advance_cb(&mut self) {
1229        self.compute_cb_transition();
1230    }
1231
1232    /// The Centering Theory transition class from the most recent `advance_cb` call.
1233    /// Returns `Transition::NoCb` before any render or after a reset.
1234    pub fn last_transition(&self) -> Transition {
1235        self.last_transition
1236    }
1237
1238    /// The current backward-looking center, if any.
1239    pub fn cb(&self) -> Option<&str> {
1240        self.cb.as_deref()
1241    }
1242
1243    /// The forward-looking centers being built during the current render,
1244    /// ordered by rank ascending (Cp = first element).
1245    pub fn cf(&self) -> &[Cf] {
1246        &self.current_cf
1247    }
1248
1249    /// The forward-looking centers from the previous render.
1250    pub fn previous_cf(&self) -> &[Cf] {
1251        &self.previous_cf
1252    }
1253
1254    /// Compute and store the Cb for the **next** render, using Cf overlap to
1255    /// identify the backward-looking center as the highest-ranked entity in
1256    /// Cf(current) that also appeared in Cf(previous).
1257    ///
1258    /// When the pure Cf-overlap definition yields no shared entity, the method
1259    /// falls back to prior-focus logic to preserve backward compatibility with
1260    /// Rule 1 pronoun tests:
1261    ///
1262    /// - **No previous Cf** (first render / post-reset): Cb = Cp of current.
1263    /// - **No overlap, new entity first time**: prior focus stays as Cb
1264    ///   (Smooth Shift — introduce gently, keep prior thread alive).
1265    /// - **No overlap, entity seen before**: Cb = current Cp (Retain-style).
1266    /// - **No current entity**: Cb carries prior focus forward.
1267    fn compute_cb_transition(&mut self) {
1268        // Cp of this render = first element of current_cf (lowest rank).
1269        let current_cp: Option<String> = self.current_cf.first().map(|c| c.name.clone());
1270        let prev_cb = self.cb.clone();
1271
1272        // New Cb: highest-ranked Cf member shared with the previous Cf.
1273        let new_cb: Option<String> = self
1274            .current_cf
1275            .iter()
1276            .find(|c| self.previous_cf.iter().any(|p| p.name == c.name))
1277            .map(|c| c.name.clone());
1278
1279        // Fallback when the Cf-overlap definition yields nothing.
1280        let new_cb = match (new_cb, current_cp.clone(), self.previous_focus.clone()) {
1281            // Overlap found: use it.
1282            (Some(cb), _, _) => Some(cb),
1283
1284            // First render (no previous focus yet): Cb = Cp.
1285            (None, Some(cp), None) => Some(cp),
1286
1287            // No overlap, but there is a previous focus.
1288            (None, Some(cp), Some(_)) => {
1289                if self.entities.get(&cp).is_some_and(|m| m.mention_count > 1) {
1290                    // Entity seen before: Retain — Cb shifts to newly-focused entity.
1291                    Some(cp)
1292                } else {
1293                    // Brand-new entity: Smooth Shift — prior focus stays as Cb.
1294                    self.previous_focus.clone()
1295                }
1296            }
1297
1298            // No current entity: carry prior focus forward.
1299            (None, None, Some(p)) => Some(p),
1300            (None, None, None) => None,
1301        };
1302
1303        // Classify the transition.
1304        let transition =
1305            classify_transition(new_cb.as_deref(), prev_cb.as_deref(), current_cp.as_deref());
1306
1307        self.cb = new_cb;
1308        self.last_transition = transition;
1309
1310        // Shift state forward for the next call.
1311        self.previous_focus = current_cp;
1312        self.previous_cf = core::mem::take(&mut self.current_cf);
1313    }
1314}
1315
1316/// Classify a Centering Theory transition given the new Cb, the previous Cb,
1317/// and the Cp (preferred center) of the current utterance.
1318///
1319/// Returns `NoCb` when:
1320/// - There is no current Cb (the utterance has no realized entities), or
1321/// - There is no previous Cb (first render or post-reset — no prior discourse
1322///   context exists to classify a transition against).
1323fn classify_transition(cb: Option<&str>, prev_cb: Option<&str>, cp: Option<&str>) -> Transition {
1324    let cb = match cb {
1325        Some(c) => c,
1326        None => return Transition::NoCb,
1327    };
1328    // No prior Cb → no meaningful transition (first render or post-reset).
1329    let prev_cb = match prev_cb {
1330        Some(p) => p,
1331        None => return Transition::NoCb,
1332    };
1333    let cb_eq_prev = prev_cb == cb;
1334    let cb_eq_cp = matches!(cp, Some(c) if c == cb);
1335
1336    match (cb_eq_prev, cb_eq_cp) {
1337        (true, true) => Transition::Continue,
1338        (true, false) => Transition::Retain,
1339        (false, true) => Transition::SmoothShift,
1340        (false, false) => Transition::RoughShift,
1341    }
1342}
1343
1344impl Default for DiscourseState {
1345    fn default() -> Self {
1346        Self::new()
1347    }
1348}
1349
1350pub(crate) fn sentence_word_counts(text: &str) -> Vec<usize> {
1351    let mut counts = Vec::new();
1352    let mut current = 0usize;
1353
1354    for raw in text.split_whitespace() {
1355        if raw.chars().any(|c| c.is_alphanumeric()) {
1356            current += 1;
1357        }
1358
1359        if (raw.ends_with('.') || raw.ends_with('!') || raw.ends_with('?')) && current > 0 {
1360            counts.push(current);
1361            current = 0;
1362        }
1363    }
1364
1365    if current > 0 {
1366        counts.push(current);
1367    }
1368
1369    counts
1370}
1371
1372/// Check if two template keys represent the same action type.
1373/// e.g., "code.renamed" and "code.renamed" → true
1374/// e.g., "code.renamed" and "code.deleted" → false
1375fn keys_share_action(a: &str, b: &str) -> bool {
1376    a == b
1377}
1378
1379/// Check if two template keys represent contrasting actions.
1380fn keys_contrast(a: &str, b: &str) -> bool {
1381    let contrasts = &[("added", "deleted"), ("added", "removed")];
1382    let a_action = a.rsplit('.').next().unwrap_or("");
1383    let b_action = b.rsplit('.').next().unwrap_or("");
1384
1385    contrasts
1386        .iter()
1387        .any(|&(x, y)| (a_action == x && b_action == y) || (a_action == y && b_action == x))
1388}
1389
1390#[cfg(test)]
1391mod tests {
1392    use super::*;
1393
1394    #[test]
1395    fn first_mention_is_full() {
1396        let state = DiscourseState::new();
1397        assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1398    }
1399
1400    #[test]
1401    fn second_mention_is_pronoun_when_focused() {
1402        let mut state = DiscourseState::new();
1403        state.begin_render();
1404        state.mention_entity("UserService", "class");
1405
1406        state.begin_render();
1407        assert_eq!(state.reference_form("UserService"), ReferenceForm::Pronoun);
1408    }
1409
1410    #[test]
1411    fn ambiguity_prevents_pronoun() {
1412        let mut state = DiscourseState::new();
1413        state.begin_render();
1414        state.mention_entity("UserService", "class");
1415        state.mention_entity("AuthService", "class");
1416
1417        state.begin_render();
1418        // Both were mentioned recently — ambiguous, use short name
1419        assert_eq!(
1420            state.reference_form("UserService"),
1421            ReferenceForm::ShortName
1422        );
1423    }
1424
1425    #[test]
1426    fn distant_mention_reintroduces_full() {
1427        let mut state = DiscourseState::new();
1428        state.begin_render();
1429        state.mention_entity("UserService", "class");
1430
1431        // Advance several renders without mentioning it
1432        state.begin_render();
1433        state.begin_render();
1434        state.begin_render();
1435
1436        assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1437    }
1438
1439    #[test]
1440    fn reset_clears_all_state() {
1441        let mut state = DiscourseState::new();
1442        state.begin_render();
1443        state.mention_entity("UserService", "class");
1444        state.record_template_choice("code.renamed", 0);
1445
1446        state.reset();
1447
1448        assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1449        assert_eq!(state.last_template_variant("code.renamed"), None);
1450        assert!(state.is_first_render());
1451    }
1452
1453    #[test]
1454    fn template_history_tracks_last_variant() {
1455        let mut state = DiscourseState::new();
1456        state.record_template_choice("code.renamed", 2);
1457        assert_eq!(state.last_template_variant("code.renamed"), Some(2));
1458    }
1459
1460    #[test]
1461    fn connective_avoids_repetition() {
1462        let mut state = DiscourseState::new();
1463        let rel = DiscourseRelation::SameEntityDifferentAction;
1464
1465        let c1 = state.select_connective(&rel).unwrap();
1466        let c2 = state.select_connective(&rel).unwrap();
1467        let c3 = state.select_connective(&rel).unwrap();
1468
1469        assert_ne!(c1, c2);
1470        assert_ne!(c2, c3);
1471        assert_ne!(c1, c3);
1472    }
1473
1474    #[test]
1475    fn connective_recency_window_spans_mixed_relation_types() {
1476        let mut state = DiscourseState::new();
1477        let same_entity = DiscourseRelation::SameEntityDifferentAction;
1478        let same_action = DiscourseRelation::DifferentEntitySameAction;
1479        let contrast = DiscourseRelation::Contrast;
1480
1481        assert_eq!(state.select_connective(&same_entity), Some("Additionally,"));
1482        assert_eq!(state.select_connective(&contrast), Some("Meanwhile,"));
1483        assert_eq!(state.select_connective(&same_action), Some("Similarly,"));
1484        assert_eq!(state.select_connective(&contrast), Some("However,"));
1485
1486        // "Additionally," is still inside the six-entry recency window, so
1487        // the selector moves to the next unused same-entity connective.
1488        assert_eq!(state.select_connective(&same_entity), Some("Furthermore,"));
1489    }
1490
1491    #[test]
1492    fn connective_family_budget_drops_to_null_when_pool_saturates() {
1493        let mut state = DiscourseState::new();
1494        let rel = DiscourseRelation::SameEntityDifferentAction;
1495
1496        // Three-element continuation pool drains uniquely.
1497        assert_eq!(state.select_connective(&rel), Some("Additionally,"));
1498        assert_eq!(state.select_connective(&rel), Some("Furthermore,"));
1499        assert_eq!(state.select_connective(&rel), Some("It also"));
1500
1501        // Saturation: rather than recycling the LRU choice and producing
1502        // an Additionally,/Furthermore,/It also,/Additionally,... cycle,
1503        // the family budget suppresses the next emissions so the prose
1504        // dissolves into plain follow-on sentences.
1505        assert_eq!(state.select_connective(&rel), None);
1506        assert_eq!(state.select_connective(&rel), None);
1507        assert_eq!(state.select_connective(&rel), None);
1508
1509        // Once enough null slots accumulate inside the trailing window,
1510        // the budget reopens and the LRU pick resumes — Additionally
1511        // is the oldest emitted connector in `connective_history`.
1512        assert_eq!(state.select_connective(&rel), Some("Additionally,"));
1513    }
1514
1515    /// Regression for the service-shape prose Matt flagged: five follow-on
1516    /// sentences that all trigger DifferentEntitySameAction must NOT
1517    /// produce a `Similarly,/Likewise,/Similarly,/Likewise,/Similarly,`
1518    /// alternation. The two-element similarity pool can sustain at most
1519    /// two emissions inside the family window before the budget forces
1520    /// nulls so the pattern dissolves.
1521    #[test]
1522    fn similarity_family_budget_breaks_service_shape_alternation() {
1523        let mut state = DiscourseState::new();
1524        let rel = DiscourseRelation::DifferentEntitySameAction;
1525
1526        let emissions: Vec<Option<&'static str>> =
1527            (0..5).map(|_| state.select_connective(&rel)).collect();
1528
1529        let connectors: Vec<&'static str> = emissions.iter().filter_map(|e| *e).collect();
1530
1531        assert!(
1532            connectors.len() <= 2,
1533            "expected at most two similarity-family connectives across five \
1534             follow-on sentences, got {emissions:?}"
1535        );
1536
1537        // No A/B/A pattern: the third emission (if any) must not match
1538        // the connective two slots earlier.
1539        for window in emissions.windows(3) {
1540            if let (Some(a), Some(_), Some(c)) = (window[0], window[1], window[2]) {
1541                assert_ne!(
1542                    a, c,
1543                    "A/B/A alternation slipped through the budget: {emissions:?}"
1544                );
1545            }
1546        }
1547
1548        // Both members of the pool should appear at most once in the
1549        // surfaced connector list — the budget caps usage at pool.len()
1550        // = 2 distinct connectives, never two of the same.
1551        let similarly = connectors.iter().filter(|c| **c == "Similarly,").count();
1552        let likewise = connectors.iter().filter(|c| **c == "Likewise,").count();
1553        assert!(
1554            similarly <= 1 && likewise <= 1,
1555            "no similarity connector should repeat inside the family window: {emissions:?}"
1556        );
1557    }
1558
1559    #[test]
1560    fn no_connective_for_none_relation() {
1561        let mut state = DiscourseState::new();
1562        assert!(state.select_connective(&DiscourseRelation::None).is_none());
1563    }
1564
1565    /// Regression: repeated entity-less templates must not be classified
1566    /// as DifferentEntitySameAction — that yields spurious "Similarly,"
1567    /// connectives where no entity comparison is meaningful.
1568    #[test]
1569    fn entity_less_repeated_render_produces_no_relation() {
1570        let mut state = DiscourseState::new();
1571        state.begin_render();
1572        state.last_template_key = Some("code.added".to_string());
1573        state.last_entity_name = None;
1574
1575        assert_eq!(
1576            state.detect_relation("code.added", None),
1577            DiscourseRelation::None
1578        );
1579    }
1580
1581    /// Regression: only one side having an entity is also insufficient to
1582    /// infer either same-entity or different-entity relationships.
1583    #[test]
1584    fn one_sided_entity_presence_produces_no_relation() {
1585        let mut state = DiscourseState::new();
1586        state.begin_render();
1587        state.last_template_key = Some("t".to_string());
1588        state.last_entity_name = Some("Foo".to_string());
1589
1590        assert_eq!(state.detect_relation("t", None), DiscourseRelation::None);
1591    }
1592
1593    #[test]
1594    fn detect_same_entity_different_action() {
1595        let mut state = DiscourseState::new();
1596        state.begin_render();
1597        state.last_template_key = Some("code.renamed".to_string());
1598        state.last_entity_name = Some("Foo".to_string());
1599
1600        assert_eq!(
1601            state.detect_relation("code.deleted", Some("Foo")),
1602            DiscourseRelation::SameEntityDifferentAction
1603        );
1604    }
1605
1606    #[test]
1607    fn detect_different_entity_same_action() {
1608        let mut state = DiscourseState::new();
1609        state.begin_render();
1610        state.last_template_key = Some("code.renamed".to_string());
1611        state.last_entity_name = Some("Foo".to_string());
1612
1613        assert_eq!(
1614            state.detect_relation("code.renamed", Some("Bar")),
1615            DiscourseRelation::DifferentEntitySameAction
1616        );
1617    }
1618
1619    #[test]
1620    fn detect_contrast() {
1621        let mut state = DiscourseState::new();
1622        state.begin_render();
1623        state.last_template_key = Some("code.added".to_string());
1624        state.last_entity_name = Some("Foo".to_string());
1625
1626        assert_eq!(
1627            state.detect_relation("code.deleted", Some("Bar")),
1628            DiscourseRelation::Contrast
1629        );
1630    }
1631
1632    #[test]
1633    fn repetition_score_penalizes_recent_overlap() {
1634        let mut state = DiscourseState::new();
1635        state.begin_render();
1636        state.record_output_words("The class UserService was renamed to AccountService");
1637
1638        state.begin_render();
1639        let score_high =
1640            state.repetition_score("The class UserService was modified affecting AccountService");
1641        let score_low = state.repetition_score("AuthGuard removed from the application entirely");
1642
1643        assert!(score_high > score_low);
1644    }
1645
1646    #[test]
1647    fn sentence_rhythm_score_penalizes_recent_sentence_lengths() {
1648        let mut state = DiscourseState::new();
1649        state.record_sentence_rhythm("Alpha changed after validation passed.");
1650
1651        let repeated_cadence = state.sentence_rhythm_score("Beta changed after review passed");
1652        let varied_cadence =
1653            state.sentence_rhythm_score("Beta changed after review passed and deployment resumed");
1654
1655        assert!(
1656            repeated_cadence > varied_cadence,
1657            "same-length candidates should score worse than varied ones"
1658        );
1659    }
1660
1661    #[test]
1662    fn sentence_rhythm_score_penalizes_same_side_runs() {
1663        // History: three short sentences (3, 4, 3 words). Mean = 3.33.
1664        // Last emitted sentence (3 words) is below mean.
1665        //
1666        // Pivoting candidate: a single noticeably-long sentence (above mean)
1667        // — flips side relative to history's last entry, no same-side
1668        // penalty fires.
1669        //
1670        // Same-side candidate: another short sentence (below mean) — same
1671        // side as history's last entry, so the burst-pivot penalty fires.
1672        //
1673        // The same-side candidate's closeness/mean-delta cost is also
1674        // higher (it sits inside the recent cluster), but the penalty must
1675        // strictly increase the gap, not flip its sign. Both effects push
1676        // the score in the same direction; the assertion proves the
1677        // additive penalty is observable on top of the existing terms.
1678        let mut state = DiscourseState::new();
1679        state.record_sentence_rhythm("Alpha shipped today. Beta paused. Gamma shipped.");
1680
1681        let pivoting = state.sentence_rhythm_score(
1682            "Delta shipped after the schema migration finished and the staging build went green",
1683        );
1684        let same_side = state.sentence_rhythm_score("Delta shipped today");
1685
1686        assert!(
1687            same_side > pivoting,
1688            "same-side candidate ({same_side}) must score worse than pivoting candidate ({pivoting})"
1689        );
1690    }
1691
1692    #[test]
1693    fn sentence_rhythm_score_pivot_penalty_does_not_dominate_repetition() {
1694        // Construct two candidates where the same-side candidate is
1695        // otherwise repetition-clean and the pivoting candidate reuses the
1696        // entire prior render's vocabulary. The discourse score the engine
1697        // actually compares is repetition + rhythm; this test pins down
1698        // that the rhythm penalty cannot flip the verdict on its own — the
1699        // repetition signal must still dominate.
1700        let mut state = DiscourseState::new();
1701        state.begin_render();
1702        state.record_output_words("AuthService validated tokens against the registry");
1703        state.record_sentence_rhythm("AuthService validated tokens against the registry.");
1704
1705        state.begin_render();
1706        // Pivoting candidate sits on the opposite side of the running mean
1707        // (much longer) but reuses every distinctive word from the prior
1708        // render — heavy repetition.
1709        let pivoting_repeats = "AuthService validated tokens against the registry yet again";
1710        // Same-side candidate matches the prior cadence (same length) but
1711        // introduces wholly new vocabulary — minimal repetition.
1712        let same_side_clean = "PaymentGateway settled invoices nightly";
1713
1714        let rep_pivot = state.repetition_score(pivoting_repeats);
1715        let rep_clean = state.repetition_score(same_side_clean);
1716        let rhy_pivot = state.sentence_rhythm_score(pivoting_repeats);
1717        let rhy_clean = state.sentence_rhythm_score(same_side_clean);
1718
1719        assert!(
1720            (rep_pivot + rhy_pivot) > (rep_clean + rhy_clean),
1721            "repetition-heavy pivoting candidate ({}) must still score worse \
1722             than the repetition-clean same-side candidate ({}); the burst-pivot \
1723             penalty is a tie-breaker, not a faithfulness override",
1724            rep_pivot + rhy_pivot,
1725            rep_clean + rhy_clean,
1726        );
1727        // And the rhythm-side delta alone must be smaller than the
1728        // repetition-side delta — proves the penalty is bounded relative
1729        // to the dominant constraint.
1730        assert!(
1731            (rep_pivot - rep_clean).abs() > (rhy_clean - rhy_pivot).abs(),
1732            "repetition delta ({}) must dominate rhythm delta ({})",
1733            rep_pivot - rep_clean,
1734            rhy_clean - rhy_pivot,
1735        );
1736    }
1737
1738    #[test]
1739    fn sentence_rhythm_score_is_never_negative() {
1740        // The score is a sum of non-negative components divided by a
1741        // positive count. Sweep a handful of histories and candidates to
1742        // pin down the invariant — a future change that introduces a
1743        // reward (subtraction) must update this test deliberately.
1744        let mut state = DiscourseState::new();
1745        for prior in [
1746            "Alpha shipped.",
1747            "Beta paused after the long postmortem dragged on.",
1748            "Gamma. Delta. Epsilon shipped after lunch.",
1749        ] {
1750            state.record_sentence_rhythm(prior);
1751        }
1752
1753        for candidate in [
1754            "",
1755            "Zeta shipped.",
1756            "Zeta shipped after a careful review and a brief rollout window.",
1757            "Short. Long sentence with quite a few words inside it. Short again.",
1758        ] {
1759            let score = state.sentence_rhythm_score(candidate);
1760            assert!(
1761                score >= 0.0,
1762                "rhythm score must be non-negative (candidate `{candidate}`, score {score})"
1763            );
1764        }
1765    }
1766
1767    #[test]
1768    fn sentence_rhythm_history_is_bounded() {
1769        let mut state = DiscourseState::new();
1770        state.record_sentence_rhythm(
1771            "One changed. Two changed. Three changed. Four changed. Five changed. Six changed. Seven changed.",
1772        );
1773
1774        assert_eq!(state.sentence_length_history.len(), SENTENCE_RHYTHM_WINDOW);
1775    }
1776
1777    // --- Cb tracking tests (Phase 1) ---
1778
1779    #[test]
1780    fn cb_none_before_first_render() {
1781        let state = DiscourseState::new();
1782        assert_eq!(state.cb, None);
1783    }
1784
1785    #[test]
1786    fn cb_becomes_focus_after_first_render() {
1787        let mut state = DiscourseState::new();
1788        state.begin_render();
1789        state.mention_entity("Foo", "class");
1790        state.advance_cb();
1791        assert_eq!(state.cb.as_deref(), Some("Foo"));
1792    }
1793
1794    #[test]
1795    fn cb_stays_on_continue_transition() {
1796        let mut state = DiscourseState::new();
1797        state.begin_render();
1798        state.mention_entity("Foo", "class");
1799        state.advance_cb();
1800        state.begin_render();
1801        state.mention_entity("Foo", "class");
1802        state.advance_cb();
1803        assert_eq!(state.cb.as_deref(), Some("Foo"));
1804    }
1805
1806    #[test]
1807    fn cb_shifts_to_prior_focus_on_new_entity_intro() {
1808        // Render 1: Foo → Cb becomes Foo (first render, no prev).
1809        // Render 2: Bar (new entity, mention_count == 1 so Smooth Shift) → Cb stays Foo.
1810        let mut state = DiscourseState::new();
1811        state.begin_render();
1812        state.mention_entity("Foo", "class");
1813        state.advance_cb();
1814        state.begin_render();
1815        state.mention_entity("Bar", "class");
1816        state.advance_cb();
1817        assert_eq!(state.cb.as_deref(), Some("Foo"));
1818    }
1819
1820    #[test]
1821    fn cb_shifts_to_current_on_retain() {
1822        // Render 1: Foo
1823        // Render 2: Foo (continue)
1824        // Render 3: Foo (continue)
1825        // Render 4: Bar (new entity; Smooth Shift → Cb=Foo)
1826        // Render 5: Foo (re-focus on previously-seen entity; Retain → Cb=Foo)
1827        let mut state = DiscourseState::new();
1828        for name in ["Foo", "Foo", "Foo", "Bar", "Foo"] {
1829            state.begin_render();
1830            state.mention_entity(name, "class");
1831            state.advance_cb();
1832        }
1833        // Foo has mention_count >= 2 by render 5 → Retain → Cb=Foo
1834        assert_eq!(state.cb.as_deref(), Some("Foo"));
1835    }
1836
1837    #[test]
1838    fn cb_reset_clears_state() {
1839        let mut state = DiscourseState::new();
1840        state.begin_render();
1841        state.mention_entity("Foo", "class");
1842        state.advance_cb();
1843        state.reset();
1844        assert_eq!(state.cb, None);
1845        assert_eq!(state.previous_focus, None);
1846    }
1847
1848    #[test]
1849    fn reference_form_all_variants_distinct() {
1850        // Sanity: ensure the new variants are distinguishable.
1851        assert_ne!(ReferenceForm::Full, ReferenceForm::Zero);
1852        assert_ne!(ReferenceForm::Pronoun, ReferenceForm::Demonstrative);
1853        assert_ne!(ReferenceForm::Pronoun, ReferenceForm::Possessive);
1854        assert_ne!(ReferenceForm::Possessive, ReferenceForm::ShortName);
1855        assert_ne!(ReferenceForm::Zero, ReferenceForm::Demonstrative);
1856    }
1857
1858    #[test]
1859    fn list_style_cycles() {
1860        let mut state = DiscourseState::new();
1861        let s1 = state.next_list_style();
1862        let s2 = state.next_list_style();
1863        let s3 = state.next_list_style();
1864        let s4 = state.next_list_style();
1865
1866        // The first four picks still match the original order so existing
1867        // golden tests (e.g. document_render_preserves_list_style_cycle_across_paragraphs)
1868        // remain stable.
1869        assert_eq!(s1, ListStyle::Including);
1870        assert_eq!(s2, ListStyle::SuchAs);
1871        assert_eq!(s3, ListStyle::Dash);
1872        assert_eq!(s4, ListStyle::Bracketed);
1873    }
1874
1875    #[test]
1876    fn list_style_cycle_visits_every_variant_within_palette_length() {
1877        // Anti-repeat plus deterministic walk should still surface every
1878        // registered variant within LIST_STYLES.len() consecutive picks,
1879        // otherwise the palette has dead variants users never see.
1880        let mut state = DiscourseState::new();
1881        let mut seen: std::collections::HashSet<ListStyle> = std::collections::HashSet::new();
1882        for _ in 0..LIST_STYLES.len() {
1883            seen.insert(state.next_list_style());
1884        }
1885        assert_eq!(
1886            seen.len(),
1887            LIST_STYLES.len(),
1888            "anti-repeat cycle dropped a variant: visited {seen:?}"
1889        );
1890    }
1891
1892    #[test]
1893    fn list_style_anti_repeat_skips_recent_window() {
1894        // With LIST_STYLE_RECENT_WINDOW = 2, no style may repeat within 3
1895        // consecutive picks. Walk a long horizon and assert the invariant.
1896        let mut state = DiscourseState::new();
1897        let mut history: Vec<ListStyle> = Vec::new();
1898        for _ in 0..(LIST_STYLES.len() * 3) {
1899            let style = state.next_list_style();
1900            if history.len() >= LIST_STYLE_RECENT_WINDOW {
1901                let recent = &history[history.len() - LIST_STYLE_RECENT_WINDOW..];
1902                assert!(
1903                    !recent.contains(&style),
1904                    "style {style:?} repeated within recent window {recent:?} (history: {history:?})"
1905                );
1906            }
1907            history.push(style);
1908        }
1909    }
1910
1911    #[test]
1912    fn forced_list_style_blocks_immediate_auto_repeat() {
1913        // record_list_style_used pushes onto the same recent window as
1914        // next_list_style. After forcing Bracketed twice in a row, the
1915        // next auto pick must NOT be Bracketed — the original failure
1916        // mode was a pure-modulo cycle landing on the just-forced style.
1917        let mut state = DiscourseState::new();
1918        state.record_list_style_used(ListStyle::Bracketed);
1919        state.record_list_style_used(ListStyle::Bracketed);
1920
1921        let auto = state.next_list_style();
1922        assert_ne!(auto, ListStyle::Bracketed);
1923    }
1924
1925    #[test]
1926    fn forced_list_style_followed_by_auto_skips_window() {
1927        // If the template forces Including at the same point the auto-cycle
1928        // would have produced Including, the next auto pick must skip past
1929        // the forced style rather than emit it again.
1930        let mut state = DiscourseState::new();
1931        // Auto cycle starts at LIST_STYLES[0] = Including. Pre-empt with
1932        // a forced Including.
1933        state.record_list_style_used(ListStyle::Including);
1934        let auto = state.next_list_style();
1935        assert_ne!(auto, ListStyle::Including);
1936    }
1937
1938    #[test]
1939    fn reset_list_cycle_clears_recent_window_so_first_style_returns() {
1940        let mut state = DiscourseState::new();
1941        let _ = state.next_list_style();
1942        let _ = state.next_list_style();
1943        state.reset_list_cycle();
1944
1945        assert_eq!(state.next_list_style(), ListStyle::Including);
1946    }
1947
1948    // --- Paragraph-reset invariants (preserve narrative-level anti-repeat,
1949    // clear paragraph-local pronoun/centering state) ---
1950
1951    #[test]
1952    fn paragraph_reset_clears_focus_entity_so_no_pronoun_leak() {
1953        let mut state = DiscourseState::new();
1954        state.begin_render();
1955        state.mention_entity("UserService", "class");
1956
1957        state.reset_for_paragraph();
1958
1959        // Without an entity table or focus carryover, the next paragraph's
1960        // first reference must reintroduce the entity in full form rather
1961        // than pronominalize a stale focus from the prior paragraph.
1962        assert_eq!(state.reference_form("UserService"), ReferenceForm::Full);
1963        assert_eq!(state.focus_entity, None);
1964        assert!(!state.focus_is_plural);
1965    }
1966
1967    #[test]
1968    fn paragraph_reset_clears_centering_state() {
1969        let mut state = DiscourseState::new();
1970        state.begin_render();
1971        state.mention_entity_ranked("Foo", "class", 0);
1972        state.advance_cb();
1973        state.begin_render();
1974        state.mention_entity_ranked("Foo", "class", 0);
1975        state.advance_cb();
1976
1977        state.reset_for_paragraph();
1978
1979        assert_eq!(state.cb(), None);
1980        assert!(state.cf().is_empty());
1981        assert!(state.previous_cf().is_empty());
1982        assert_eq!(state.last_transition(), Transition::NoCb);
1983    }
1984
1985    #[test]
1986    fn paragraph_reset_suppresses_cross_paragraph_relation_inference() {
1987        let mut state = DiscourseState::new();
1988        state.begin_render();
1989        state.last_template_key = Some("code.added".to_string());
1990        state.last_entity_name = Some("Foo".to_string());
1991
1992        state.reset_for_paragraph();
1993
1994        // Same key + same entity in the next paragraph must not be classified
1995        // as `Contrast`/`SameEntityDifferentAction` — those would emit a
1996        // cross-paragraph "However,"/"Furthermore," that bridges over the
1997        // intentional paragraph break.
1998        assert_eq!(
1999            state.detect_relation("code.deleted", Some("Foo")),
2000            DiscourseRelation::None
2001        );
2002    }
2003
2004    #[test]
2005    fn paragraph_reset_preserves_template_variant_history() {
2006        let mut state = DiscourseState::new();
2007        state.record_template_choice("code.renamed", 2);
2008
2009        state.reset_for_paragraph();
2010
2011        // Anti-repeat must survive the paragraph break so the next paragraph
2012        // doesn't immediately replay the variant the prior paragraph just used.
2013        assert_eq!(state.last_template_variant("code.renamed"), Some(2));
2014    }
2015
2016    #[test]
2017    fn paragraph_reset_preserves_word_repetition_penalty() {
2018        let mut state = DiscourseState::new();
2019        state.begin_render();
2020        state.record_output_words("AuthGuard removed authentication entirely");
2021
2022        state.reset_for_paragraph();
2023
2024        // begin_render advances render_index for the next paragraph's first
2025        // utterance; the repetition score must still penalize words that
2026        // appeared in the prior paragraph.
2027        state.begin_render();
2028        let overlap_score = state.repetition_score("AuthGuard authentication entirely was removed");
2029        let unrelated_score = state.repetition_score("Telemetry pipeline rebuilt cleanly");
2030        assert!(
2031            overlap_score > unrelated_score,
2032            "expected overlap score {overlap_score} to exceed unrelated {unrelated_score}",
2033        );
2034        assert!(
2035            overlap_score > 0.0,
2036            "word_history must persist across paragraph reset"
2037        );
2038    }
2039
2040    #[test]
2041    fn paragraph_reset_preserves_render_index_so_demonstrative_continues() {
2042        let mut state = DiscourseState::new();
2043        // Simulate paragraph 1 with one event.
2044        state.begin_render();
2045        state.mention_entity("Foo", "class");
2046        state.advance_cb();
2047
2048        state.reset_for_paragraph();
2049
2050        // First render of paragraph 2.
2051        state.begin_render();
2052        // has_prior_render() drives `{noun|demonstrative}`'s "this X" vs
2053        // "the X" decision. Inside a single narrative, "this" remains correct
2054        // after the paragraph break — only a full session reset returns to
2055        // the introductory "the".
2056        assert!(state.has_prior_render());
2057        assert!(!state.is_first_render());
2058    }
2059
2060    #[test]
2061    fn paragraph_reset_preserves_list_style_cycle() {
2062        let mut state = DiscourseState::new();
2063        let first = state.next_list_style();
2064        let second_before = state.next_list_style();
2065
2066        state.reset_for_paragraph();
2067        let next_after_reset = state.next_list_style();
2068
2069        // Cycle must NOT restart at the first style after a paragraph break.
2070        assert_ne!(next_after_reset, first);
2071        assert_ne!(next_after_reset, second_before);
2072    }
2073
2074    #[test]
2075    fn full_reset_clears_anti_repeat_state() {
2076        // The full-narrative reset must still clear everything — anti-repeat
2077        // continuity belongs to a narrative, not to the session as a whole.
2078        let mut state = DiscourseState::new();
2079        state.begin_render();
2080        state.record_template_choice("k", 1);
2081        state.record_output_words("alpha beta gamma");
2082
2083        state.reset();
2084
2085        assert_eq!(state.last_template_variant("k"), None);
2086        // Newly-recorded non-overlapping words score zero against an empty
2087        // word_history.
2088        state.begin_render();
2089        assert_eq!(state.repetition_score("alpha beta gamma"), 0.0);
2090    }
2091
2092    // --- Cf and Transition tests (Phase 2 + Phase 3) ---
2093
2094    #[test]
2095    fn transition_no_cb_before_first_render() {
2096        let state = DiscourseState::new();
2097        assert_eq!(state.last_transition(), Transition::NoCb);
2098    }
2099
2100    #[test]
2101    fn transition_no_cb_when_no_entity() {
2102        let mut state = DiscourseState::new();
2103        state.begin_render();
2104        state.advance_cb();
2105        assert_eq!(state.last_transition(), Transition::NoCb);
2106    }
2107
2108    #[test]
2109    fn transition_nocb_after_first_mention() {
2110        // First render: no previous Cf exists, so no transition is meaningful.
2111        // prev_cb = None → classify_transition returns NoCb (no Cb to compare against prev).
2112        // But after the first render, cb is set to current entity.
2113        // The first advance_cb: new_cb = Some("Foo") (fallback: first render, no prev_focus).
2114        // prev_cb = None → classify_transition(Some("Foo"), None, Some("Foo"))
2115        //   → cb_eq_prev = false (prev is None), cb_eq_cp = true → SmoothShift.
2116        // But the plan says NoCb for the first render. The plan's test checks
2117        // last_transition == NoCb after render 1, which means we should return NoCb
2118        // when prev_cb is None (there's no prior Cb to continue from).
2119        let mut state = DiscourseState::new();
2120        state.begin_render();
2121        state.mention_entity("Foo", "class");
2122        state.advance_cb();
2123        assert_eq!(state.last_transition(), Transition::NoCb);
2124    }
2125
2126    #[test]
2127    fn transition_continue_same_entity_and_cp() {
2128        let mut state = DiscourseState::new();
2129        state.begin_render();
2130        state.mention_entity("Foo", "class");
2131        state.advance_cb();
2132        // First render → NoCb.
2133        assert_eq!(state.last_transition(), Transition::NoCb);
2134
2135        state.begin_render();
2136        state.mention_entity("Foo", "class");
2137        state.advance_cb();
2138        // Same entity again: Cb stays Foo, Cp is Foo → Continue.
2139        assert_eq!(state.last_transition(), Transition::Continue);
2140    }
2141
2142    #[test]
2143    fn transition_continue_when_cp_and_cb_both_same() {
2144        let mut state = DiscourseState::new();
2145        state.begin_render();
2146        state.mention_entity_ranked("Foo", "class", 0);
2147        state.advance_cb();
2148
2149        state.begin_render();
2150        state.mention_entity_ranked("Foo", "class", 0);
2151        state.advance_cb();
2152        assert_eq!(state.last_transition(), Transition::Continue);
2153    }
2154
2155    #[test]
2156    fn transition_retain_when_cb_same_but_cp_differs() {
2157        let mut state = DiscourseState::new();
2158        state.begin_render();
2159        state.mention_entity_ranked("Foo", "class", 0);
2160        state.advance_cb();
2161
2162        state.begin_render();
2163        // Foo still in Cf (rank 1 — object), but Cp is now Bar (rank 0 — subject).
2164        // Cb = Foo (only entity in common with previous Cf), Cp = Bar → Cb != Cp → Retain.
2165        state.mention_entity_ranked("Bar", "class", 0);
2166        state.mention_entity_ranked("Foo", "class", 1);
2167        state.advance_cb();
2168        assert_eq!(state.last_transition(), Transition::Retain);
2169    }
2170
2171    #[test]
2172    fn transition_smooth_shift_new_entity() {
2173        let mut state = DiscourseState::new();
2174        state.begin_render();
2175        state.mention_entity("Foo", "class");
2176        state.advance_cb();
2177
2178        state.begin_render();
2179        state.mention_entity("Bar", "class");
2180        state.advance_cb();
2181        // New entity, no overlap with previous Cf → fallback: Bar seen for first time
2182        // → previous_focus stays as Cb. Cp = Bar, Cb = Foo (prev focus).
2183        // prev_cb was Foo; new_cb = Foo; prev_cb == new_cb true; new_cb == Cp false → Retain.
2184        // OR: if Bar is brand-new and no overlap, fallback gives new_cb = previous_focus = Foo.
2185        // Then: cb_eq_prev = (Foo == Foo) = true, cb_eq_cp = (Foo == Bar) = false → Retain.
2186        // But the plan says SmoothShift. The plan's test is at Phase 1 before full Cf is wired.
2187        // With full Cf: previous_cf = [{Foo,0}], current_cf = [{Bar,0}]. No overlap.
2188        // Bar is brand-new (mention_count == 1 after this render but the check uses > 1).
2189        // So fallback: previous_focus (= Foo) → new_cb = Foo.
2190        // classify_transition(Some("Foo"), Some("Foo"), Some("Bar"))
2191        //   → cb_eq_prev = true, cb_eq_cp = false → Retain.
2192        // The plan's Phase 1 test was drafted without full Cf; with Cf it's Retain.
2193        // We verify the correct Cf-based result: Retain.
2194        assert_eq!(state.last_transition(), Transition::Retain);
2195    }
2196
2197    #[test]
2198    fn transition_smooth_shift_new_cb_equals_cp() {
2199        // True Smooth Shift: Cb changes AND Cb == Cp.
2200        // We need overlap between current and previous Cf where the new Cb != prev Cb.
2201        // u1: Foo (rank 0). Cb = Foo (first render, NoCb transition).
2202        // u2: Bar (rank 0), Foo (rank 1). Cf overlap = {Foo}. Cb = Foo.
2203        //   prev_cb = Foo; new_cb = Foo; cb_eq_prev = true; cb_eq_cp = (Foo==Bar)=false → Retain.
2204        // To get SmoothShift we need new_cb != prev_cb AND new_cb == cp.
2205        // u1: Foo. u2: Bar + Foo (Cb=Foo, prev_cb=Foo → Retain).
2206        // u3: Bar (rank 0 only). Cf={Bar}. Overlap with u2 Cf={Bar,Foo}: Bar is in both.
2207        //   new_cb = Bar. prev_cb = Foo. cp = Bar.
2208        //   cb_eq_prev = (Bar==Foo) = false; cb_eq_cp = (Bar==Bar) = true → SmoothShift.
2209        let mut state = DiscourseState::new();
2210        state.begin_render();
2211        state.mention_entity_ranked("Foo", "class", 0);
2212        state.advance_cb();
2213
2214        state.begin_render();
2215        state.mention_entity_ranked("Bar", "class", 0);
2216        state.mention_entity_ranked("Foo", "class", 1);
2217        state.advance_cb();
2218        assert_eq!(state.last_transition(), Transition::Retain);
2219
2220        state.begin_render();
2221        state.mention_entity_ranked("Bar", "class", 0);
2222        state.advance_cb();
2223        assert_eq!(state.last_transition(), Transition::SmoothShift);
2224    }
2225
2226    #[test]
2227    fn transition_rough_shift_proper() {
2228        let mut state = DiscourseState::new();
2229        // u1: focus Foo. Cb = Foo. Cp = Foo. → NoCb (first render).
2230        state.begin_render();
2231        state.mention_entity_ranked("Foo", "class", 0);
2232        state.advance_cb();
2233
2234        // u2: Bar (rank 0), Foo (rank 1).
2235        // Cf overlap with u1 Cf={Foo}: Foo is shared. Cb = Foo.
2236        // prev_cb = Foo, new_cb = Foo, cp = Bar.
2237        // cb_eq_prev = true, cb_eq_cp = false → Retain.
2238        state.begin_render();
2239        state.mention_entity_ranked("Bar", "class", 0);
2240        state.mention_entity_ranked("Foo", "class", 1);
2241        state.advance_cb();
2242        assert_eq!(state.last_transition(), Transition::Retain);
2243
2244        // u3: Baz (rank 0), Bar (rank 1).
2245        // Cf overlap with u2 Cf={Bar,Foo}: Bar is in current_cf. Cb = Bar.
2246        // prev_cb = Foo (from u1→u2 transition), cp = Baz.
2247        // cb_eq_prev = (Bar==Foo) = false, cb_eq_cp = (Bar==Baz) = false → RoughShift.
2248        state.begin_render();
2249        state.mention_entity_ranked("Baz", "class", 0);
2250        state.mention_entity_ranked("Bar", "class", 1);
2251        state.advance_cb();
2252        assert_eq!(state.last_transition(), Transition::RoughShift);
2253    }
2254
2255    #[test]
2256    fn cf_deduplicates_by_name_keeping_lower_rank() {
2257        let mut state = DiscourseState::new();
2258        state.begin_render();
2259        state.mention_entity_ranked("Foo", "class", 2);
2260        state.mention_entity_ranked("Foo", "class", 0);
2261        let cf = state.cf();
2262        assert_eq!(cf.len(), 1);
2263        assert_eq!(cf[0].rank, 0);
2264    }
2265
2266    #[test]
2267    fn cf_deduplication_keeps_lower_rank_when_second_is_higher() {
2268        let mut state = DiscourseState::new();
2269        state.begin_render();
2270        state.mention_entity_ranked("Foo", "class", 0);
2271        state.mention_entity_ranked("Foo", "class", 2);
2272        let cf = state.cf();
2273        assert_eq!(cf.len(), 1);
2274        assert_eq!(cf[0].rank, 0);
2275    }
2276
2277    #[test]
2278    fn cf_sorts_by_rank_ascending() {
2279        let mut state = DiscourseState::new();
2280        state.begin_render();
2281        state.mention_entity_ranked("Obj", "class", 1);
2282        state.mention_entity_ranked("Subj", "class", 0);
2283        state.mention_entity_ranked("Oblique", "class", 2);
2284        let cf = state.cf();
2285        assert_eq!(cf[0].name, "Subj");
2286        assert_eq!(cf[1].name, "Obj");
2287        assert_eq!(cf[2].name, "Oblique");
2288    }
2289
2290    #[test]
2291    fn cp_is_first_cf_entry() {
2292        let mut state = DiscourseState::new();
2293        state.begin_render();
2294        state.mention_entity_ranked("Subj", "class", 0);
2295        state.mention_entity_ranked("Obj", "class", 1);
2296        assert_eq!(state.cf()[0].name, "Subj");
2297    }
2298
2299    #[test]
2300    fn cf_cleared_by_begin_render() {
2301        let mut state = DiscourseState::new();
2302        state.begin_render();
2303        state.mention_entity_ranked("Foo", "class", 0);
2304        assert_eq!(state.cf().len(), 1);
2305
2306        state.begin_render();
2307        assert_eq!(
2308            state.cf().len(),
2309            0,
2310            "current_cf must be cleared by begin_render"
2311        );
2312    }
2313
2314    #[test]
2315    fn previous_cf_set_after_advance_cb() {
2316        let mut state = DiscourseState::new();
2317        state.begin_render();
2318        state.mention_entity_ranked("Foo", "class", 0);
2319        state.mention_entity_ranked("Bar", "class", 1);
2320        state.advance_cb();
2321
2322        let prev = state.previous_cf();
2323        assert_eq!(prev.len(), 2);
2324        assert_eq!(prev[0].name, "Foo");
2325        assert_eq!(prev[1].name, "Bar");
2326    }
2327
2328    #[test]
2329    fn mention_entity_delegates_to_rank_zero() {
2330        let mut state = DiscourseState::new();
2331        state.begin_render();
2332        state.mention_entity("Foo", "class");
2333        let cf = state.cf();
2334        assert_eq!(cf.len(), 1);
2335        assert_eq!(cf[0].rank, 0);
2336    }
2337
2338    #[test]
2339    fn classify_transition_all_cases() {
2340        // Continue: cb == prev_cb AND cb == cp.
2341        assert_eq!(
2342            classify_transition(Some("Foo"), Some("Foo"), Some("Foo")),
2343            Transition::Continue
2344        );
2345        // Retain: cb == prev_cb, cb != cp.
2346        assert_eq!(
2347            classify_transition(Some("Foo"), Some("Foo"), Some("Bar")),
2348            Transition::Retain
2349        );
2350        // SmoothShift: cb != prev_cb, cb == cp.
2351        assert_eq!(
2352            classify_transition(Some("Bar"), Some("Foo"), Some("Bar")),
2353            Transition::SmoothShift
2354        );
2355        // RoughShift: cb != prev_cb, cb != cp.
2356        assert_eq!(
2357            classify_transition(Some("Bar"), Some("Foo"), Some("Baz")),
2358            Transition::RoughShift
2359        );
2360        // NoCb: no current cb.
2361        assert_eq!(
2362            classify_transition(None, Some("Foo"), Some("Bar")),
2363            Transition::NoCb
2364        );
2365        // NoCb with all None.
2366        assert_eq!(classify_transition(None, None, None), Transition::NoCb);
2367    }
2368
2369    #[test]
2370    fn reset_clears_cf_and_transition_state() {
2371        let mut state = DiscourseState::new();
2372        state.begin_render();
2373        state.mention_entity_ranked("Foo", "class", 0);
2374        state.advance_cb();
2375        state.reset();
2376
2377        assert_eq!(state.cf().len(), 0);
2378        assert_eq!(state.previous_cf().len(), 0);
2379        assert_eq!(state.last_transition(), Transition::NoCb);
2380    }
2381}
prosaic_core/discourse.rs

prosaic_core/
discourse.rs