1use std::collections::HashMap;
7
8use super::arxiv_db::ArxivDatabase;
9use super::types::{
10 ArxivCitation, BloomLevel, ReflectionQuestion, ReflectionReading, TranscriptInput,
11};
12
13pub fn generate_reflection(
18 transcript: &TranscriptInput,
19 topic_override: Option<&str>,
20) -> ReflectionReading {
21 let themes = extract_themes(&transcript.text);
22 let questions = generate_bloom_questions(&themes, &transcript.text);
23
24 let db = ArxivDatabase::builtin();
25 let citations = if let Some(topic) = topic_override {
26 find_citations_for_topic(&db, topic)
27 } else {
28 find_citations_for_themes(&db, &themes)
29 };
30
31 ReflectionReading { themes, questions, citations }
32}
33
34pub fn render_reflection_markdown(reading: &ReflectionReading) -> String {
36 let mut md = String::new();
37 md.push_str("# Reflection Reading\n\n");
38
39 md.push_str("## Key Themes\n\n");
41 if reading.themes.is_empty() {
42 md.push_str("No dominant themes extracted.\n\n");
43 } else {
44 for theme in &reading.themes {
45 md.push_str(&format!("- {theme}\n"));
46 }
47 md.push('\n');
48 }
49
50 md.push_str("## Reflection Questions\n\n");
52 if reading.questions.is_empty() {
53 md.push_str("No reflection questions generated.\n\n");
54 } else {
55 for (i, q) in reading.questions.iter().enumerate() {
56 md.push_str(&format!("{}. **[{}]** {}\n\n", i + 1, q.thinking_level, q.question));
57 }
58 }
59
60 md.push_str("## Further Reading\n\n");
62 if reading.citations.is_empty() {
63 md.push_str("No matching citations found.\n");
64 } else {
65 for cite in &reading.citations {
66 md.push_str(&format!(
67 "- {} ({}) — [{}]({}) — *{}*\n",
68 cite.authors, cite.year, cite.title, cite.url, cite.abstract_snippet,
69 ));
70 }
71 }
72 md.push('\n');
73
74 md
75}
76
77fn extract_themes(text: &str) -> Vec<String> {
82 let words: Vec<&str> = text
83 .split_whitespace()
84 .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric()))
85 .filter(|w| w.len() >= 3)
86 .collect();
87
88 let mut freq: HashMap<String, usize> = HashMap::new();
90 for word in &words {
91 let lower = word.to_lowercase();
92 if !is_stop_word(&lower) && lower.len() >= 3 {
93 *freq.entry(lower).or_default() += 1;
94 }
95 }
96
97 let mut bigram_freq: HashMap<String, usize> = HashMap::new();
99 for pair in words.windows(2) {
100 let a = pair[0].to_lowercase();
101 let b = pair[1].to_lowercase();
102 if !is_stop_word(&a) && !is_stop_word(&b) && a.len() >= 3 && b.len() >= 3 {
103 let bigram = format!("{a} {b}");
104 *bigram_freq.entry(bigram).or_default() += 1;
105 }
106 }
107
108 let mut themes: Vec<(String, usize)> =
110 bigram_freq.into_iter().filter(|(_, count)| *count >= 2).collect();
111
112 let top_unigrams: Vec<(String, usize)> = {
113 let mut v: Vec<_> = freq.into_iter().filter(|(_, c)| *c >= 3).collect();
114 v.sort_by(|a, b| b.1.cmp(&a.1));
115 v.into_iter().take(10).collect()
116 };
117
118 themes.extend(top_unigrams);
119 themes.sort_by(|a, b| b.1.cmp(&a.1));
120 themes.dedup_by(|a, b| a.0 == b.0);
121
122 themes.into_iter().take(5).map(|(t, _)| capitalize_theme(&t)).collect()
123}
124
125fn capitalize_theme(s: &str) -> String {
126 let mut chars = s.chars();
127 match chars.next() {
128 None => String::new(),
129 Some(c) => c.to_uppercase().to_string() + chars.as_str(),
130 }
131}
132
133fn generate_bloom_questions(themes: &[String], text: &str) -> Vec<ReflectionQuestion> {
134 if themes.is_empty() {
135 return Vec::new();
136 }
137
138 let text_lower = text.to_lowercase();
139
140 let bloom_templates: Vec<(BloomLevel, &[&str])> = vec![
141 (
142 BloomLevel::Analysis,
143 &[
144 "What are the key components of {theme}, and how do they relate to each other?",
145 "Compare and contrast the approaches to {theme} discussed in the lecture.",
146 "What assumptions underlie the discussion of {theme}?",
147 ],
148 ),
149 (
150 BloomLevel::Synthesis,
151 &[
152 "How would you combine the concepts from {theme} with your existing knowledge to solve a novel problem?",
153 "Design a system that integrates {theme} with a complementary technique.",
154 ],
155 ),
156 (
157 BloomLevel::Evaluation,
158 &[
159 "What are the strengths and limitations of the approach to {theme} presented here?",
160 "Under what conditions would {theme} fail or underperform?",
161 ],
162 ),
163 (
164 BloomLevel::Application,
165 &[
166 "How would you apply {theme} in a production environment?",
167 "Describe a real-world scenario where {theme} would provide significant value.",
168 ],
169 ),
170 (
171 BloomLevel::Creation,
172 &[
173 "Propose an improvement or extension to {theme} that addresses a current limitation.",
174 "Design an experiment to validate the effectiveness of {theme} in your domain.",
175 ],
176 ),
177 ];
178
179 let mut questions = Vec::new();
180
181 for (level, templates) in &bloom_templates {
182 let theme = select_theme_for_level(themes, &text_lower, level);
184 let template_idx = match level {
185 BloomLevel::Analysis => {
186 usize::from(text_lower.contains("compare") || text_lower.contains("contrast"))
187 }
188 BloomLevel::Evaluation => {
189 usize::from(text_lower.contains("limitation") || text_lower.contains("trade"))
190 }
191 _ => 0,
192 };
193
194 let template = templates[template_idx.min(templates.len() - 1)];
195 let question = template.replace("{theme}", &theme);
196
197 questions.push(ReflectionQuestion { question, thinking_level: *level });
198 }
199
200 questions
201}
202
203fn select_theme_for_level(themes: &[String], _text: &str, _level: &BloomLevel) -> String {
204 let idx = match _level {
206 BloomLevel::Analysis => 0,
207 BloomLevel::Synthesis => themes.len().min(1),
208 BloomLevel::Evaluation => themes.len().min(2) % themes.len(),
209 BloomLevel::Application => 0,
210 BloomLevel::Creation => themes.len().min(1) % themes.len(),
211 };
212 themes.get(idx).cloned().unwrap_or_else(|| "the topic".to_string())
213}
214
215fn find_citations_for_topic(db: &ArxivDatabase, topic: &str) -> Vec<ArxivCitation> {
216 let mut results = db.find_by_topic(topic, 5);
217 if results.len() < 3 {
218 let keywords: Vec<&str> = topic.split_whitespace().collect();
220 let additional = db.find_by_keywords(&keywords, 5 - results.len());
221 for cite in additional {
222 if !results.iter().any(|r| r.arxiv_id == cite.arxiv_id) {
223 results.push(cite);
224 }
225 }
226 }
227 results.truncate(5);
228 results
229}
230
231fn find_citations_for_themes(db: &ArxivDatabase, themes: &[String]) -> Vec<ArxivCitation> {
232 let keywords: Vec<&str> = themes.iter().map(|t| t.as_str()).collect();
233 let mut results = db.find_by_keywords(&keywords, 5);
234
235 if results.len() < 3 {
237 for theme in themes {
238 let additional = db.find_by_topic(theme, 2);
239 for cite in additional {
240 if !results.iter().any(|r| r.arxiv_id == cite.arxiv_id) {
241 results.push(cite);
242 }
243 }
244 if results.len() >= 5 {
245 break;
246 }
247 }
248 }
249
250 results.truncate(5);
251 results
252}
253
254fn is_stop_word(word: &str) -> bool {
255 const STOP: &[&str] = &[
256 "the",
257 "a",
258 "an",
259 "is",
260 "are",
261 "was",
262 "were",
263 "be",
264 "been",
265 "being",
266 "have",
267 "has",
268 "had",
269 "do",
270 "does",
271 "did",
272 "will",
273 "would",
274 "could",
275 "should",
276 "may",
277 "might",
278 "can",
279 "shall",
280 "to",
281 "of",
282 "in",
283 "for",
284 "on",
285 "with",
286 "at",
287 "by",
288 "from",
289 "as",
290 "or",
291 "and",
292 "but",
293 "if",
294 "not",
295 "no",
296 "so",
297 "up",
298 "out",
299 "it",
300 "its",
301 "this",
302 "that",
303 "these",
304 "those",
305 "we",
306 "you",
307 "they",
308 "he",
309 "she",
310 "my",
311 "your",
312 "our",
313 "us",
314 "all",
315 "each",
316 "every",
317 "both",
318 "few",
319 "more",
320 "most",
321 "other",
322 "some",
323 "such",
324 "than",
325 "too",
326 "very",
327 "just",
328 "also",
329 "about",
330 "which",
331 "what",
332 "when",
333 "where",
334 "how",
335 "who",
336 "whom",
337 "why",
338 "into",
339 "through",
340 "during",
341 "before",
342 "after",
343 "above",
344 "below",
345 "between",
346 "same",
347 "different",
348 "then",
349 "there",
350 "here",
351 "new",
352 "old",
353 "many",
354 "much",
355 "own",
356 "only",
357 "well",
358 "use",
359 "used",
360 "using",
361 "like",
362 "one",
363 "two",
364 "get",
365 "make",
366 "way",
367 ];
368 STOP.contains(&word)
369}
370
371#[cfg(test)]
372mod tests {
373 use super::*;
374 use crate::oracle::coursera::types::TranscriptInput;
375
376 fn make_transcript(text: &str) -> TranscriptInput {
377 TranscriptInput {
378 text: text.to_string(),
379 language: "en".to_string(),
380 segments: vec![],
381 source_path: "test.txt".to_string(),
382 }
383 }
384
385 #[test]
386 fn test_generate_reflection_with_topic() {
387 let t = make_transcript(
388 "Machine learning models need careful deployment. MLOps practices help \
389 automate the deployment pipeline. Continuous integration ensures quality. \
390 Testing machine learning models requires specialized approaches.",
391 );
392 let reading = generate_reflection(&t, Some("mlops"));
393 assert!(!reading.citations.is_empty(), "Should find mlops citations");
394 assert!(!reading.questions.is_empty());
395 }
396
397 #[test]
398 fn test_generate_reflection_auto_themes() {
399 let t = make_transcript(
400 "Transformer models use attention mechanisms. Attention allows the model to \
401 focus on relevant parts of the input. The transformer architecture has \
402 revolutionized natural language processing. Attention is computed as a \
403 weighted sum of values. Transformer attention enables parallel computation.",
404 );
405 let reading = generate_reflection(&t, None);
406 assert!(!reading.themes.is_empty());
407 assert!(!reading.questions.is_empty());
408 }
409
410 #[test]
411 fn test_generate_reflection_empty_transcript() {
412 let t = make_transcript("");
413 let reading = generate_reflection(&t, None);
414 assert!(reading.themes.is_empty());
415 }
416
417 #[test]
418 fn test_render_reflection_markdown() {
419 let reading = ReflectionReading {
420 themes: vec!["Machine learning".to_string(), "Deployment".to_string()],
421 questions: vec![ReflectionQuestion {
422 question: "What are the key challenges?".to_string(),
423 thinking_level: BloomLevel::Analysis,
424 }],
425 citations: vec![ArxivCitation {
426 arxiv_id: "1706.03762".to_string(),
427 title: "Attention Is All You Need".to_string(),
428 authors: "Vaswani et al.".to_string(),
429 year: 2017,
430 url: "https://arxiv.org/abs/1706.03762".to_string(),
431 abstract_snippet: "Proposes the Transformer.".to_string(),
432 topics: vec!["transformer".to_string()],
433 }],
434 };
435 let md = render_reflection_markdown(&reading);
436 assert!(md.contains("# Reflection Reading"));
437 assert!(md.contains("Machine learning"));
438 assert!(md.contains("[Analysis]"));
439 assert!(md.contains("Vaswani"));
440 assert!(md.contains("https://arxiv.org"));
441 }
442
443 #[test]
444 fn test_render_reflection_empty() {
445 let reading = ReflectionReading { themes: vec![], questions: vec![], citations: vec![] };
446 let md = render_reflection_markdown(&reading);
447 assert!(md.contains("No dominant themes"));
448 assert!(md.contains("No reflection questions"));
449 assert!(md.contains("No matching citations"));
450 }
451
452 #[test]
453 fn test_extract_themes() {
454 let themes = extract_themes(
455 "Deep learning models require large datasets for training. Deep learning \
456 has transformed computer vision and natural language processing. Training \
457 deep learning models requires significant compute resources. Deep learning \
458 architectures include transformers and convolutional networks.",
459 );
460 assert!(!themes.is_empty());
461 assert!(
463 themes.iter().any(|t| t.to_lowercase().contains("deep") || t.to_lowercase().contains("learning")),
464 "Themes: {:?}",
465 themes
466 );
467 }
468
469 #[test]
470 fn test_bloom_question_levels() {
471 let themes = vec!["Machine learning".to_string()];
472 let questions = generate_bloom_questions(&themes, "Machine learning is important.");
473 assert_eq!(questions.len(), 5);
474
475 let levels: Vec<BloomLevel> = questions.iter().map(|q| q.thinking_level).collect();
476 assert!(levels.contains(&BloomLevel::Analysis));
477 assert!(levels.contains(&BloomLevel::Synthesis));
478 assert!(levels.contains(&BloomLevel::Evaluation));
479 assert!(levels.contains(&BloomLevel::Application));
480 assert!(levels.contains(&BloomLevel::Creation));
481 }
482
483 #[test]
484 fn test_bloom_questions_empty_themes() {
485 let questions = generate_bloom_questions(&[], "Some text");
486 assert!(questions.is_empty());
487 }
488
489 #[test]
490 fn test_find_citations_for_topic() {
491 let db = ArxivDatabase::builtin();
492 let results = find_citations_for_topic(&db, "transformer");
493 assert!(!results.is_empty());
494 assert!(results.len() <= 5);
495 }
496
497 #[test]
498 fn test_find_citations_for_themes() {
499 let db = ArxivDatabase::builtin();
500 let themes = vec!["mlops".to_string(), "deployment".to_string()];
501 let results = find_citations_for_themes(&db, &themes);
502 assert!(!results.is_empty());
503 }
504
505 #[test]
506 fn test_bloom_compare_contrast_template() {
507 let themes = vec!["Neural networks".to_string()];
509 let questions =
510 generate_bloom_questions(&themes, "We compare different neural network architectures.");
511 let analysis_q = questions
512 .iter()
513 .find(|q| q.thinking_level == BloomLevel::Analysis)
514 .expect("unexpected failure");
515 assert!(
516 analysis_q.question.contains("Compare and contrast"),
517 "Got: {}",
518 analysis_q.question
519 );
520 }
521
522 #[test]
523 fn test_bloom_limitation_template() {
524 let themes = vec!["Attention".to_string()];
526 let questions =
527 generate_bloom_questions(&themes, "A key limitation of attention is memory cost.");
528 let eval_q = questions
529 .iter()
530 .find(|q| q.thinking_level == BloomLevel::Evaluation)
531 .expect("unexpected failure");
532 assert!(
533 eval_q.question.contains("conditions") || eval_q.question.contains("fail"),
534 "Got: {}",
535 eval_q.question
536 );
537 }
538
539 #[test]
540 fn test_capitalize_theme_empty() {
541 assert_eq!(capitalize_theme(""), "");
543 }
544
545 #[test]
546 fn test_find_citations_for_topic_sparse() {
547 let db = ArxivDatabase::builtin();
549 let results = find_citations_for_topic(&db, "obscure quantum federated distillation");
550 assert!(results.len() <= 5);
552 }
553
554 #[test]
555 fn test_find_citations_for_themes_individual_fallback() {
556 let db = ArxivDatabase::builtin();
558 let themes =
559 vec!["xyznonexistent".to_string(), "transformer".to_string(), "attention".to_string()];
560 let results = find_citations_for_themes(&db, &themes);
561 assert!(!results.is_empty());
563 }
564}