1use std::collections::HashMap;
17
18use async_trait::async_trait;
19use unicode_segmentation::UnicodeSegmentation;
20
21use crate::summarizer::backend::{CompactMode, CompactOpts, Style, SummarizerBackend};
22use crate::summarizer::error::BackendError;
23use crate::tokenizer::{self, Tokenizer};
24
25const PAGERANK_DAMPING: f32 = 0.85;
27const PAGERANK_MAX_ITER: usize = 50;
28const PAGERANK_TOL: f32 = 1e-4;
29const SIMILARITY_FLOOR: f32 = 0.1;
30
31#[derive(Debug, Clone)]
33pub(super) struct Sentence {
34 pub span_start: usize,
35 pub text: String,
36}
37
38pub(super) fn split_sentences(content: &str) -> Vec<Sentence> {
39 let mut out = Vec::new();
40 for (offset, s) in content.split_sentence_bound_indices() {
41 let trimmed = s.trim();
42 if trimmed.chars().count() < 3 {
43 continue;
44 }
45 if parse_atx_heading(trimmed).is_some() {
50 continue;
51 }
52 out.push(Sentence {
53 span_start: offset,
54 text: trimmed.to_string(),
55 });
56 }
57 out
58}
59
60#[cfg(test)]
61mod sentence_tests {
62 use super::*;
63
64 #[test]
65 fn split_produces_three_sentences_from_simple_paragraph() {
66 let text = "Hello world. How are you? Fine!";
67 let s = split_sentences(text);
68 assert_eq!(s.len(), 3);
69 assert_eq!(s[0].text, "Hello world.");
70 assert_eq!(s[1].text, "How are you?");
71 assert_eq!(s[2].text, "Fine!");
72 }
73
74 #[test]
75 fn split_skips_short_fragments() {
76 let text = "Hi. This is a longer sentence.";
77 let s = split_sentences(text);
78 assert_eq!(s.len(), 2);
80 }
81
82 #[test]
83 fn split_handles_unicode_punctuation() {
84 let text = "Привет мир. Это тест.";
85 let s = split_sentences(text);
86 assert_eq!(s.len(), 2);
87 }
88
89 #[test]
90 fn split_preserves_byte_offsets() {
91 let text = "First sentence. Second sentence here.";
92 let s = split_sentences(text);
93 assert!(s[0].span_start < s[1].span_start);
94 }
95
96 #[test]
97 fn split_returns_empty_for_empty_input() {
98 assert!(split_sentences("").is_empty());
99 }
100}
101
102pub(super) fn tokenize(s: &str) -> Vec<String> {
104 s.unicode_words().map(str::to_lowercase).collect()
105}
106
107pub(super) fn tfidf_vectors(sentences: &[Sentence]) -> Vec<HashMap<usize, f32>> {
110 if sentences.is_empty() {
111 return Vec::new();
112 }
113
114 let mut df: HashMap<String, usize> = HashMap::new();
116 let mut vocab: HashMap<String, usize> = HashMap::new();
117 let mut vocab_terms: Vec<String> = Vec::new();
118 let tokens_per_sent: Vec<Vec<String>> = sentences.iter().map(|s| tokenize(&s.text)).collect();
119 for terms in &tokens_per_sent {
120 let mut seen: std::collections::HashSet<&str> = std::collections::HashSet::new();
121 for t in terms {
122 if seen.insert(t.as_str()) {
123 *df.entry(t.clone()).or_insert(0) += 1;
124 }
125 if !vocab.contains_key(t) {
126 let id = vocab.len();
127 vocab.insert(t.clone(), id);
128 vocab_terms.push(t.clone());
129 }
130 }
131 }
132 let n = sentences.len() as f32;
133
134 let mut vectors = Vec::with_capacity(sentences.len());
136 for terms in &tokens_per_sent {
137 let mut tf: HashMap<usize, f32> = HashMap::new();
138 for t in terms {
139 let id = vocab[t.as_str()];
140 *tf.entry(id).or_insert(0.0) += 1.0;
141 }
142 let mut tfidf: HashMap<usize, f32> = HashMap::new();
143 for (id, count) in tf {
144 let term = vocab_terms[id].as_str();
145 let dfv = df[term] as f32;
146 let idf = (n / dfv).ln(); if idf > 0.0 {
148 tfidf.insert(id, count * idf);
149 }
150 }
151 let norm: f32 = tfidf.values().map(|v| v * v).sum::<f32>().sqrt();
153 if norm > 0.0 {
154 for v in tfidf.values_mut() {
155 *v /= norm;
156 }
157 }
158 vectors.push(tfidf);
159 }
160 vectors
161}
162
163pub(super) fn cosine(a: &HashMap<usize, f32>, b: &HashMap<usize, f32>) -> f32 {
165 if a.is_empty() || b.is_empty() {
166 return 0.0;
167 }
168 let (small, large) = if a.len() <= b.len() { (a, b) } else { (b, a) };
169 let mut sum = 0.0;
170 for (k, v) in small {
171 if let Some(w) = large.get(k) {
172 sum += v * w;
173 }
174 }
175 sum
176}
177
178#[cfg(test)]
179mod tfidf_tests {
180 use super::*;
181
182 fn sents(strs: &[&str]) -> Vec<Sentence> {
183 strs.iter()
184 .enumerate()
185 .map(|(i, s)| Sentence {
186 span_start: i * 100,
187 text: s.to_string(),
188 })
189 .collect()
190 }
191
192 #[test]
193 fn tokenize_lowercases_words() {
194 let t = tokenize("Hello, World! Don't stop.");
195 assert!(t.contains(&"hello".to_string()));
196 assert!(t.contains(&"world".to_string()));
197 assert!(t.contains(&"don't".to_string()));
198 assert!(t.contains(&"stop".to_string()));
199 }
200
201 #[test]
202 fn tfidf_zeroes_out_terms_appearing_everywhere() {
203 let s = sents(&["the cat sat", "the cat slept", "the cat ran"]);
204 let vecs = tfidf_vectors(&s);
205 for v in &vecs {
207 assert_eq!(v.len(), 1, "sentence kept only the discriminating term");
209 }
210 }
211
212 #[test]
213 fn cosine_is_one_for_identical_sentences() {
214 let s = sents(&["hello world", "hello world", "goodbye moon"]);
218 let v = tfidf_vectors(&s);
219 let c = cosine(&v[0], &v[1]);
220 assert!(
221 (c - 1.0).abs() < 1e-5,
222 "cosine on identical non-degenerate sentences was {c}",
223 );
224 }
225
226 #[test]
227 fn cosine_is_zero_for_disjoint_sentences() {
228 let s = sents(&["alpha beta", "gamma delta", "epsilon zeta"]);
231 let v = tfidf_vectors(&s);
232 assert!(cosine(&v[0], &v[1]).abs() < 1e-5);
234 }
235
236 #[test]
237 fn tfidf_returns_empty_for_empty_input() {
238 assert!(tfidf_vectors(&[]).is_empty());
239 }
240
241 #[test]
242 fn tfidf_handles_all_universal_corpus() {
243 let s = sents(&["the cat", "the cat", "the cat"]);
247 let v = tfidf_vectors(&s);
248 for vec in &v {
249 assert!(vec.is_empty(), "expected empty vector, got {vec:?}");
250 }
251 let pr = pagerank(&v);
252 assert_eq!(pr.len(), 3);
253 for score in &pr {
255 assert!(
256 (score - 1.0 / 3.0).abs() < 0.1,
257 "expected ~0.333 uniform, got {score}",
258 );
259 }
260 }
261}
262
263pub(super) fn pagerank(vectors: &[HashMap<usize, f32>]) -> Vec<f32> {
267 let n = vectors.len();
268 if n == 0 {
269 return Vec::new();
270 }
271 if n == 1 {
272 return vec![1.0];
273 }
274
275 let mut weights = vec![0.0_f32; n * n];
277 let mut row_sums = vec![0.0_f32; n];
278 for i in 0..n {
279 for j in (i + 1)..n {
280 let s = cosine(&vectors[i], &vectors[j]);
281 if s >= SIMILARITY_FLOOR {
282 weights[i * n + j] = s;
283 weights[j * n + i] = s;
284 row_sums[i] += s;
285 row_sums[j] += s;
286 }
287 }
288 }
289
290 let inv_n = 1.0_f32 / n as f32;
291 let mut score = vec![inv_n; n];
292 let teleport = (1.0 - PAGERANK_DAMPING) * inv_n;
293
294 for _ in 0..PAGERANK_MAX_ITER {
295 let mut next = vec![teleport; n];
296 for j in 0..n {
297 if row_sums[j] < f32::EPSILON {
298 let share = PAGERANK_DAMPING * score[j] * inv_n;
300 for slot in next.iter_mut() {
301 *slot += share;
302 }
303 } else {
304 let factor = PAGERANK_DAMPING * score[j] / row_sums[j];
305 for i in 0..n {
306 let w = weights[i * n + j];
307 if w > 0.0 {
308 next[i] += factor * w;
309 }
310 }
311 }
312 }
313 let delta: f32 = score
314 .iter()
315 .zip(next.iter())
316 .map(|(a, b)| (a - b).abs())
317 .sum();
318 score = next;
319 if delta < PAGERANK_TOL {
320 break;
321 }
322 }
323 score
324}
325
326#[cfg(test)]
327mod pagerank_tests {
328 use super::*;
329
330 fn sents(strs: &[&str]) -> Vec<Sentence> {
331 strs.iter()
332 .enumerate()
333 .map(|(i, s)| Sentence {
334 span_start: i * 100,
335 text: s.to_string(),
336 })
337 .collect()
338 }
339
340 #[test]
341 fn empty_returns_empty() {
342 assert!(pagerank(&[]).is_empty());
343 }
344
345 #[test]
346 fn single_sentence_gets_full_mass() {
347 let v = tfidf_vectors(&sents(&["alpha beta gamma"]));
348 let pr = pagerank(&v);
349 assert_eq!(pr.len(), 1);
350 assert!((pr[0] - 1.0).abs() < 1e-5);
351 }
352
353 #[test]
354 fn central_sentence_outscores_peripheral() {
355 let v = tfidf_vectors(&sents(&[
358 "alpha unique_left",
359 "alpha gamma bridge",
360 "gamma unique_right",
361 ]));
362 let pr = pagerank(&v);
363 assert!(
364 pr[1] >= pr[0] && pr[1] >= pr[2],
365 "middle sentence should be highest: {pr:?}",
366 );
367 }
368
369 #[test]
370 fn scores_sum_close_to_one() {
371 let v = tfidf_vectors(&sents(&[
372 "alpha beta gamma",
373 "alpha gamma",
374 "delta epsilon zeta",
375 "alpha epsilon",
376 ]));
377 let pr = pagerank(&v);
378 let total: f32 = pr.iter().sum();
379 assert!((total - 1.0).abs() < 0.05, "sum was {total}");
380 }
381
382 #[test]
383 fn fully_disconnected_graph_converges_to_uniform() {
384 let v = tfidf_vectors(&sents(&[
388 "alpha unique_one",
389 "beta unique_two",
390 "gamma unique_three",
391 "delta unique_four",
392 ]));
393 let pr = pagerank(&v);
394 for score in &pr {
395 assert!(
396 (score - 0.25).abs() < 0.05,
397 "expected ~0.25 uniform, got {score}",
398 );
399 }
400 }
401}
402
403fn select_extractive(
409 sentences: &[Sentence],
410 scores: &[f32],
411 target_tokens: Option<usize>,
412 family: Tokenizer,
413) -> Vec<usize> {
414 if sentences.is_empty() {
415 return Vec::new();
416 }
417 let mut order: Vec<usize> = (0..sentences.len()).collect();
419 order.sort_by(|a, b| {
420 scores[*b]
421 .partial_cmp(&scores[*a])
422 .unwrap_or(std::cmp::Ordering::Equal)
423 });
424
425 let chosen = match target_tokens {
426 None => order,
427 Some(max) => {
428 let mut chosen = Vec::new();
429 let mut cumulative: usize = 0;
430 let mut warned_fallback = false;
431 for idx in order {
432 let count = match tokenizer::count(&sentences[idx].text, family) {
436 Ok(c) => c,
437 Err(e) => {
438 if !warned_fallback {
439 tracing::warn!(
440 target: "rover::summarizer",
441 family = ?family,
442 error = %e,
443 "tokenizer unavailable; falling back to chars/4 heuristic for target_tokens accounting"
444 );
445 warned_fallback = true;
446 }
447 sentences[idx].text.chars().count().div_ceil(4)
448 }
449 };
450 if chosen.is_empty() {
451 chosen.push(idx);
452 cumulative = count;
453 if count > max {
454 tracing::warn!(
455 target: "rover::summarizer",
456 sentence_tokens = count,
457 target_tokens = max,
458 "top-ranked sentence exceeds target_tokens; emitting anyway",
459 );
460 break;
461 }
462 } else if cumulative + count <= max {
463 chosen.push(idx);
464 cumulative += count;
465 } else {
466 }
468 }
469 chosen
470 }
471 };
472
473 let mut by_position = chosen;
474 by_position.sort_by_key(|&i| sentences[i].span_start);
475 by_position
476}
477
478fn format_selected(sentences: &[Sentence], indices: &[usize], style: Style) -> String {
480 match style {
481 Style::Bullet => indices
482 .iter()
483 .map(|&i| format!("- {}", sentences[i].text))
484 .collect::<Vec<_>>()
485 .join("\n"),
486 Style::Prose => indices
487 .iter()
488 .map(|&i| sentences[i].text.clone())
489 .collect::<Vec<_>>()
490 .join(" "),
491 Style::Executive => {
492 if indices.is_empty() {
495 String::new()
496 } else {
497 let head = &sentences[indices[0]].text;
498 if indices.len() == 1 {
499 head.clone()
500 } else {
501 let rest = indices[1..]
502 .iter()
503 .map(|&i| sentences[i].text.clone())
504 .collect::<Vec<_>>()
505 .join(" ");
506 format!("{head}\n\nDetails: {rest}")
507 }
508 }
509 }
510 }
511}
512
513#[cfg(test)]
514mod extractive_mode_tests {
515 use super::*;
516
517 fn sents(strs: &[&str]) -> Vec<Sentence> {
518 strs.iter()
519 .enumerate()
520 .map(|(i, s)| Sentence {
521 span_start: i * 100,
522 text: s.to_string(),
523 })
524 .collect()
525 }
526
527 #[test]
528 fn select_extractive_picks_in_source_order() {
529 let s = sents(&["Low.", "High Importance Sentence Here.", "Mid."]);
530 let v = tfidf_vectors(&s);
531 let pr = pagerank(&v);
532 let _ = pr;
535 let chosen = select_extractive(&s, &[0.1, 0.9, 0.5], None, Tokenizer::O200k);
536 assert_eq!(chosen, vec![0, 1, 2]);
537 }
538
539 #[test]
540 fn select_extractive_caps_to_target_tokens() {
541 let s = sents(&[
542 "first sentence.", "second sentence.", "third sentence.", ]);
546 let chosen = select_extractive(&s, &[0.5, 0.5, 0.5], Some(5), Tokenizer::O200k);
547 assert_eq!(chosen.len(), 1);
550 }
551
552 #[test]
553 fn select_extractive_skips_oversize_and_admits_lower_ranked_that_fits() {
554 let s = sents(&[
566 "first sentence here that is reasonably long.", "okay.", "third sentence here that is reasonably long.", ]);
570 let chosen = select_extractive(&s, &[0.9, 0.1, 0.8], Some(13), Tokenizer::O200k);
571 assert!(chosen.contains(&0), "expected index 0 in {chosen:?}");
572 assert!(chosen.contains(&1), "expected index 1 in {chosen:?}");
573 assert!(
574 !chosen.contains(&2),
575 "expected index 2 excluded in {chosen:?}"
576 );
577 }
578
579 #[test]
580 fn format_bullet_prefixes_dashes() {
581 let s = sents(&["a.", "b.", "c."]);
582 assert_eq!(format_selected(&s, &[0, 2], Style::Bullet), "- a.\n- c.",);
583 }
584
585 #[test]
586 fn format_executive_with_one_sentence_omits_details() {
587 let s = sents(&["only one."]);
588 assert_eq!(format_selected(&s, &[0], Style::Executive), "only one.");
589 }
590}
591
592fn parse_atx_heading(line: &str) -> Option<(usize, &str)> {
594 let bytes = line.as_bytes();
595 let mut depth = 0;
596 while depth < bytes.len() && bytes[depth] == b'#' {
597 depth += 1;
598 }
599 if depth == 0 || depth > 6 {
600 return None;
601 }
602 if depth == bytes.len() {
603 return None;
604 }
605 if bytes[depth] != b' ' {
606 return None;
607 }
608 let text = line[depth + 1..].trim();
609 if text.is_empty() {
610 None
611 } else {
612 Some((depth, text))
613 }
614}
615
616#[derive(Debug)]
617struct HeadingSection {
618 depth: usize,
619 heading: String,
620 sentence_indices: Vec<usize>,
622}
623
624fn group_by_headings(content: &str, sentences: &[Sentence]) -> Vec<HeadingSection> {
628 let mut headings: Vec<HeadingSection> = Vec::new();
629 let mut heading_offsets: Vec<usize> = Vec::new();
630 let mut byte_offset = 0;
631 for line in content.split_inclusive('\n') {
632 let line_trimmed = line.trim_end_matches('\n');
633 if let Some((depth, text)) = parse_atx_heading(line_trimmed) {
634 headings.push(HeadingSection {
635 depth,
636 heading: text.to_string(),
637 sentence_indices: Vec::new(),
638 });
639 heading_offsets.push(byte_offset);
640 }
641 byte_offset += line.len();
642 }
643 if headings.is_empty() {
644 return Vec::new();
645 }
646 for (si, sent) in sentences.iter().enumerate() {
647 let mut bucket: Option<usize> = None;
648 for (hi, off) in heading_offsets.iter().enumerate() {
649 if sent.span_start >= *off {
650 bucket = Some(hi);
651 } else {
652 break;
653 }
654 }
655 if let Some(b) = bucket {
656 headings[b].sentence_indices.push(si);
657 }
658 }
659 headings
660}
661
662fn select_headlines(
668 content: &str,
669 sentences: &[Sentence],
670 scores: &[f32],
671 target_tokens: Option<usize>,
672 family: Tokenizer,
673) -> String {
674 let mut groups = group_by_headings(content, sentences);
675 groups.retain(|g| !g.sentence_indices.is_empty());
677
678 if groups.is_empty() {
679 let chosen = select_extractive(
682 sentences,
683 scores,
684 target_tokens.or(Some(usize::MAX)),
685 family,
686 );
687 let trimmed: Vec<usize> = chosen.into_iter().take(3).collect();
688 return format_selected(sentences, &trimmed, Style::Bullet);
689 }
690
691 let deepest = groups.iter().map(|g| g.depth).max().unwrap();
693
694 let mut emitted = Vec::new();
696 let mut cumulative_tokens: usize = 0;
697 let token_budget = target_tokens.unwrap_or(usize::MAX);
698 let mut warned_fallback = false;
699 for g in groups.iter().filter(|g| g.depth == deepest) {
700 let best = g.sentence_indices.iter().copied().max_by(|a, b| {
701 scores[*a]
702 .partial_cmp(&scores[*b])
703 .unwrap_or(std::cmp::Ordering::Equal)
704 });
705 if let Some(idx) = best {
706 let count = match tokenizer::count(&sentences[idx].text, family) {
707 Ok(c) => c,
708 Err(e) => {
709 if !warned_fallback {
710 tracing::warn!(
711 target: "rover::summarizer",
712 family = ?family,
713 error = %e,
714 "tokenizer unavailable; falling back to chars/4 heuristic for headlines budget"
715 );
716 warned_fallback = true;
717 }
718 sentences[idx].text.chars().count().div_ceil(4)
719 }
720 };
721 if !emitted.is_empty() && cumulative_tokens + count > token_budget {
723 break;
724 }
725 let prefix = "#".repeat(g.depth);
726 emitted.push(format!("{prefix} {}\n\n{}", g.heading, sentences[idx].text));
727 cumulative_tokens += count;
728 }
729 }
730 emitted.join("\n\n")
731}
732
733#[cfg(test)]
734mod headlines_tests {
735 use super::*;
736
737 #[test]
738 fn parse_atx_extracts_depth_and_text() {
739 assert_eq!(parse_atx_heading("# Hello"), Some((1, "Hello")));
740 assert_eq!(parse_atx_heading("### Three"), Some((3, "Three")));
741 assert_eq!(parse_atx_heading("####### Too Deep"), None);
742 assert_eq!(parse_atx_heading("#NoSpace"), None);
743 assert_eq!(parse_atx_heading("Not a heading"), None);
744 }
745
746 #[test]
747 fn group_by_headings_buckets_sentences_correctly() {
748 let content =
749 "# A\nfirst sentence here.\nsecond sentence here.\n# B\nthird sentence here.\n";
750 let s = split_sentences(content);
751 let groups = group_by_headings(content, &s);
752 assert_eq!(groups.len(), 2);
753 assert_eq!(groups[0].heading, "A");
754 assert_eq!(groups[1].heading, "B");
755 assert_eq!(groups[0].sentence_indices.len(), 2);
757 assert_eq!(groups[1].sentence_indices.len(), 1);
759 }
760
761 #[test]
762 fn select_headlines_emits_one_per_section() {
763 let content = "# Intro\nThe quick brown fox.\nThe lazy dog.\n## Body\nDetails matter here.\nMore words follow.\n";
764 let s = split_sentences(content);
765 let v = tfidf_vectors(&s);
766 let pr = pagerank(&v);
767 let out = select_headlines(content, &s, &pr, None, Tokenizer::O200k);
768 assert!(out.contains("## Body"));
771 assert!(out.contains("Details") || out.contains("More words"));
772 }
773
774 #[test]
775 fn select_headlines_falls_back_when_no_headings() {
776 let content = "First sentence here. Second sentence here. Third one.";
777 let s = split_sentences(content);
778 let v = tfidf_vectors(&s);
779 let pr = pagerank(&v);
780 let out = select_headlines(content, &s, &pr, None, Tokenizer::O200k);
781 assert!(out.contains("- "));
783 }
784
785 #[test]
786 fn select_headlines_picks_deepest_depth_under_mixed_nesting() {
787 let content = "\
796# A\n\
797## A1\n\
798Sentence under A1 here describing the thing.\n\
799## A2\n\
800Sentence under A2 here describing the other thing.\n\
801# B\n\
802## B1\n\
803Sentence under B1 here describing a third thing.\n";
804 let s = split_sentences(content);
805 let v = tfidf_vectors(&s);
806 let pr = pagerank(&v);
807 let out = select_headlines(content, &s, &pr, None, Tokenizer::O200k);
808 assert!(out.contains("## A1"), "expected ## A1 in {out}");
809 assert!(out.contains("## A2"), "expected ## A2 in {out}");
810 assert!(out.contains("## B1"), "expected ## B1 in {out}");
811 for line in out.lines() {
814 if line.starts_with("# ") && !line.starts_with("## ") {
815 panic!("unexpected H1 in headlines output: {line}");
816 }
817 }
818 }
819}
820
821#[derive(Debug, Clone)]
825pub struct ExtractiveBackend {
826 name: String,
827 tokenizer: Tokenizer,
830}
831
832impl ExtractiveBackend {
833 pub fn new(name: impl Into<String>, tokenizer: Tokenizer) -> Self {
834 Self {
835 name: name.into(),
836 tokenizer,
837 }
838 }
839
840 pub fn run(&self, content: &str, opts: &CompactOpts) -> String {
842 let sentences = split_sentences(content);
843 if sentences.is_empty() {
844 return String::new();
845 }
846 let vectors = tfidf_vectors(&sentences);
847 let scores = pagerank(&vectors);
848
849 match opts.mode {
850 CompactMode::Headlines => select_headlines(
851 content,
852 &sentences,
853 &scores,
854 opts.target_tokens,
855 self.tokenizer,
856 ),
857 CompactMode::Extractive => {
858 let indices =
859 select_extractive(&sentences, &scores, opts.target_tokens, self.tokenizer);
860 format_selected(&sentences, &indices, opts.style)
861 }
862 CompactMode::Abstractive => {
863 let indices =
867 select_extractive(&sentences, &scores, opts.target_tokens, self.tokenizer);
868 format_selected(&sentences, &indices, opts.style)
869 }
870 }
871 }
872}
873
874#[async_trait]
875impl SummarizerBackend for ExtractiveBackend {
876 async fn compact(&self, content: &str, opts: &CompactOpts) -> Result<String, BackendError> {
877 if content.trim().is_empty() {
878 return Err(BackendError::Invalid("empty content".to_string()));
879 }
880 Ok(self.run(content, opts))
881 }
882
883 fn name(&self) -> &str {
884 &self.name
885 }
886}
887
888#[cfg(test)]
889mod trait_tests {
890 use super::*;
891 use crate::summarizer::backend::{CompactMode, PreserveSection, Style};
892
893 fn opts(mode: CompactMode, style: Style) -> CompactOpts {
894 CompactOpts {
895 mode,
896 style,
897 target_tokens: None,
898 focus: None,
899 preserve: vec![],
900 backend_name: "default".to_string(),
901 }
902 }
903
904 #[tokio::test]
905 async fn empty_content_returns_invalid_error() {
906 let be = ExtractiveBackend::new("default", Tokenizer::O200k);
907 let r = be
908 .compact(" ", &opts(CompactMode::Extractive, Style::Prose))
909 .await;
910 assert!(matches!(r, Err(BackendError::Invalid(_))));
911 }
912
913 #[tokio::test]
914 async fn extractive_returns_non_empty_for_real_content() {
915 let be = ExtractiveBackend::new("default", Tokenizer::O200k);
916 let content = "The cat sat on the mat. The dog ran away quickly. The bird flew south.";
917 let out = be
918 .compact(content, &opts(CompactMode::Extractive, Style::Prose))
919 .await
920 .unwrap();
921 assert!(!out.is_empty());
922 }
923
924 #[tokio::test]
925 async fn name_round_trips() {
926 let be = ExtractiveBackend::new("alt-name", Tokenizer::O200k);
927 assert_eq!(be.name(), "alt-name");
928 }
929
930 #[test]
931 fn preserve_unused_for_extractive_compiles() {
932 let _ = PreserveSection::Code;
935 }
936}