1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
//! 1.2.9+ — POV / character tracker for the status bar.
//!
//! Walks the open paragraph, extracts every lexicon hit
//! categorised as `Character`, and ranks the cast by
//! mention count (ties broken by first-mention order).
//! The top name surfaces in the status bar as the
//! presumed POV character; up to three additional names
//! trail behind as the supporting cast.
//!
//! Heuristic rationale:
//!
//! * In third-person limited prose, the POV character
//! is almost always the most-frequently-named entity
//! in the scene — the narrator's gaze inherently
//! centers them. First-person POV is a degenerate
//! case (the narrator is `I`, who isn't in the
//! character lexicon) — for those scenes the chip
//! correctly surfaces the *other* prominent character,
//! which is still useful context.
//!
//! * Ties broken by first-mention preserve the
//! "scene-opening character" convention — when two
//! characters appear equally often, the one named
//! first in the paragraph is usually the anchor.
//!
//! The function operates on already-computed
//! `Vec<Vec<LexHit>>` so the caller can reuse the
//! lexicon scan it's already doing for syntax-highlight
//! purposes if it wants to. `compute_pov_chip` is the
//! convenience entry point that runs `lexicon.row_hits`
//! per line.
use std::collections::HashMap;
use crate::tui::lexicon::{LexCategory, LexHit, Lexicon};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PovChip {
/// The presumed POV character — most-mentioned in
/// the open paragraph.
pub pov: String,
/// Other named characters present, in
/// most-mentioned-first order, capped at 3 entries
/// so the chip stays compact in the status bar.
pub supporting: Vec<String>,
}
/// Convenience entry point — runs the lexicon scan per
/// row and feeds the hits into `compute_pov_chip_from_hits`.
/// Cheap for normal paragraph sizes (<1k words); the
/// per-row scan is the same work the syntax-highlight
/// pass already performs, so calling this on every
/// status-bar repaint adds only milliseconds.
pub fn compute_pov_chip(lex: &Lexicon, lines: &[String]) -> Option<PovChip> {
if lex.is_empty() || lines.is_empty() {
return None;
}
let hits_per_row: Vec<Vec<LexHit>> =
lines.iter().map(|l| lex.row_hits(l)).collect();
compute_pov_chip_from_hits(&hits_per_row, lines)
}
/// Same as `compute_pov_chip`, but consumes pre-computed
/// per-row hits. Exposed separately so tests can drive
/// the ranking logic without constructing a Lexicon.
pub fn compute_pov_chip_from_hits(
hits_per_row: &[Vec<LexHit>],
lines: &[String],
) -> Option<PovChip> {
if hits_per_row.is_empty() {
return None;
}
// Track per-name: count + first-mention sequence.
// The sequence is the index of the first hit across
// the whole paragraph (row-major), used as the tie-
// breaker for count.
let mut counts: HashMap<String, usize> = HashMap::new();
let mut first_seen: HashMap<String, usize> = HashMap::new();
// The canonical (display) form: keep the first-
// encountered surface form so the chip shows
// "Anna" not "anna" even if later mentions are
// lowercase.
let mut display: HashMap<String, String> = HashMap::new();
let mut seq: usize = 0;
for (row_idx, row_hits) in hits_per_row.iter().enumerate() {
let line = match lines.get(row_idx) {
Some(l) => l,
None => continue,
};
let chars: Vec<char> = line.chars().collect();
for hit in row_hits {
if !matches!(hit.category, LexCategory::Character) {
continue;
}
let start = hit.col_start.min(chars.len());
let end = hit.col_end.min(chars.len());
if end <= start {
continue;
}
let surface: String = chars[start..end].iter().collect();
let surface_trim = surface.trim();
if surface_trim.is_empty() {
continue;
}
let key = surface_trim.to_lowercase();
*counts.entry(key.clone()).or_insert(0) += 1;
first_seen.entry(key.clone()).or_insert(seq);
display
.entry(key.clone())
.or_insert_with(|| surface_trim.to_string());
seq += 1;
}
}
if counts.is_empty() {
return None;
}
let mut ranked: Vec<(String, usize, usize)> = counts
.iter()
.map(|(k, c)| {
let f = first_seen.get(k).copied().unwrap_or(usize::MAX);
(k.clone(), *c, f)
})
.collect();
// Sort by count desc, ties by first_seen asc.
ranked.sort_by(|a, b| {
b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2))
});
let mut iter = ranked.into_iter();
let (pov_key, _, _) = iter.next()?;
let pov_display = display
.get(&pov_key)
.cloned()
.unwrap_or(pov_key);
let supporting: Vec<String> = iter
.take(3)
.map(|(k, _, _)| display.get(&k).cloned().unwrap_or(k))
.collect();
Some(PovChip {
pov: pov_display,
supporting,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn hit(col_start: usize, col_end: usize) -> LexHit {
LexHit {
col_start,
col_end,
category: LexCategory::Character,
}
}
fn place_hit(col_start: usize, col_end: usize) -> LexHit {
LexHit {
col_start,
col_end,
category: LexCategory::Place,
}
}
#[test]
fn no_lines_no_chip() {
assert!(compute_pov_chip_from_hits(&[], &[]).is_none());
}
#[test]
fn no_character_hits_no_chip() {
let lines = vec!["Anna walked".to_string()];
let hits: Vec<Vec<LexHit>> = vec![vec![place_hit(0, 4)]];
assert!(compute_pov_chip_from_hits(&hits, &lines).is_none());
}
#[test]
fn single_character_wins() {
let lines = vec!["Anna walked away".to_string()];
let hits: Vec<Vec<LexHit>> = vec![vec![hit(0, 4)]];
let chip = compute_pov_chip_from_hits(&hits, &lines).unwrap();
assert_eq!(chip.pov, "Anna");
assert!(chip.supporting.is_empty());
}
#[test]
fn most_mentioned_wins() {
// Bob appears 3x, Anna once → POV = Bob.
let lines = vec![
"Anna saw Bob".to_string(),
"Bob smiled".to_string(),
"Bob left".to_string(),
];
let hits: Vec<Vec<LexHit>> = vec![
vec![hit(0, 4), hit(9, 12)], // Anna 0..4, Bob 9..12
vec![hit(0, 3)], // Bob 0..3
vec![hit(0, 3)], // Bob 0..3
];
let chip = compute_pov_chip_from_hits(&hits, &lines).unwrap();
assert_eq!(chip.pov, "Bob");
assert_eq!(chip.supporting, vec!["Anna".to_string()]);
}
#[test]
fn ties_broken_by_first_mention() {
// Anna and Bob each mentioned once. Anna first
// → POV = Anna.
let lines = vec!["Anna saw Bob".to_string()];
let hits: Vec<Vec<LexHit>> = vec![vec![hit(0, 4), hit(9, 12)]];
let chip = compute_pov_chip_from_hits(&hits, &lines).unwrap();
assert_eq!(chip.pov, "Anna");
assert_eq!(chip.supporting, vec!["Bob".to_string()]);
}
#[test]
fn supporting_cast_capped_at_three() {
// Five distinct characters, each mentioned once.
let line = "Anna Bob Carol Dave Eve".to_string();
// Char positions: Anna 0..4, Bob 5..8, Carol 9..14, Dave 15..19, Eve 20..23
let hits: Vec<Vec<LexHit>> = vec![vec![
hit(0, 4),
hit(5, 8),
hit(9, 14),
hit(15, 19),
hit(20, 23),
]];
let lines = vec![line];
let chip = compute_pov_chip_from_hits(&hits, &lines).unwrap();
// POV = Anna (first-mention tiebreak), supporting
// = Bob, Carol, Dave (Eve drops off the cap).
assert_eq!(chip.pov, "Anna");
assert_eq!(
chip.supporting,
vec!["Bob".to_string(), "Carol".to_string(), "Dave".to_string()]
);
}
#[test]
fn case_normalised_for_count_display_preserves_first() {
// "Anna" then "anna" then "ANNA" — same character,
// count = 3, display = first surface = "Anna".
let lines = vec![
"Anna laughed".to_string(),
"Then anna sighed".to_string(),
"ANNA stood up".to_string(),
];
let hits: Vec<Vec<LexHit>> = vec![
vec![hit(0, 4)],
vec![hit(5, 9)],
vec![hit(0, 4)],
];
let chip = compute_pov_chip_from_hits(&hits, &lines).unwrap();
assert_eq!(chip.pov, "Anna");
}
#[test]
fn non_character_hits_ignored() {
// Mix of Place + Character hits — only characters
// count for POV.
let lines = vec!["Anna entered Winterfell".to_string()];
// Anna 0..4 (Character), Winterfell 13..23 (Place)
let hits: Vec<Vec<LexHit>> =
vec![vec![hit(0, 4), place_hit(13, 23)]];
let chip = compute_pov_chip_from_hits(&hits, &lines).unwrap();
assert_eq!(chip.pov, "Anna");
assert!(chip.supporting.is_empty());
}
}