use std::collections::HashMap;
use super::arxiv_db::ArxivDatabase;
use super::types::{
ArxivCitation, BloomLevel, ReflectionQuestion, ReflectionReading, TranscriptInput,
};
pub fn generate_reflection(
transcript: &TranscriptInput,
topic_override: Option<&str>,
) -> ReflectionReading {
let themes = extract_themes(&transcript.text);
let questions = generate_bloom_questions(&themes, &transcript.text);
let db = ArxivDatabase::builtin();
let citations = if let Some(topic) = topic_override {
find_citations_for_topic(&db, topic)
} else {
find_citations_for_themes(&db, &themes)
};
ReflectionReading { themes, questions, citations }
}
pub fn render_reflection_markdown(reading: &ReflectionReading) -> String {
let mut md = String::new();
md.push_str("# Reflection Reading\n\n");
md.push_str("## Key Themes\n\n");
if reading.themes.is_empty() {
md.push_str("No dominant themes extracted.\n\n");
} else {
for theme in &reading.themes {
md.push_str(&format!("- {theme}\n"));
}
md.push('\n');
}
md.push_str("## Reflection Questions\n\n");
if reading.questions.is_empty() {
md.push_str("No reflection questions generated.\n\n");
} else {
for (i, q) in reading.questions.iter().enumerate() {
md.push_str(&format!("{}. **[{}]** {}\n\n", i + 1, q.thinking_level, q.question));
}
}
md.push_str("## Further Reading\n\n");
if reading.citations.is_empty() {
md.push_str("No matching citations found.\n");
} else {
for cite in &reading.citations {
md.push_str(&format!(
"- {} ({}) — [{}]({}) — *{}*\n",
cite.authors, cite.year, cite.title, cite.url, cite.abstract_snippet,
));
}
}
md.push('\n');
md
}
fn extract_themes(text: &str) -> Vec<String> {
let words: Vec<&str> = text
.split_whitespace()
.map(|w| w.trim_matches(|c: char| !c.is_alphanumeric()))
.filter(|w| w.len() >= 3)
.collect();
let mut freq: HashMap<String, usize> = HashMap::new();
for word in &words {
let lower = word.to_lowercase();
if !is_stop_word(&lower) && lower.len() >= 3 {
*freq.entry(lower).or_default() += 1;
}
}
let mut bigram_freq: HashMap<String, usize> = HashMap::new();
for pair in words.windows(2) {
let a = pair[0].to_lowercase();
let b = pair[1].to_lowercase();
if !is_stop_word(&a) && !is_stop_word(&b) && a.len() >= 3 && b.len() >= 3 {
let bigram = format!("{a} {b}");
*bigram_freq.entry(bigram).or_default() += 1;
}
}
let mut themes: Vec<(String, usize)> =
bigram_freq.into_iter().filter(|(_, count)| *count >= 2).collect();
let top_unigrams: Vec<(String, usize)> = {
let mut v: Vec<_> = freq.into_iter().filter(|(_, c)| *c >= 3).collect();
v.sort_by(|a, b| b.1.cmp(&a.1));
v.into_iter().take(10).collect()
};
themes.extend(top_unigrams);
themes.sort_by(|a, b| b.1.cmp(&a.1));
themes.dedup_by(|a, b| a.0 == b.0);
themes.into_iter().take(5).map(|(t, _)| capitalize_theme(&t)).collect()
}
fn capitalize_theme(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
None => String::new(),
Some(c) => c.to_uppercase().to_string() + chars.as_str(),
}
}
fn generate_bloom_questions(themes: &[String], text: &str) -> Vec<ReflectionQuestion> {
if themes.is_empty() {
return Vec::new();
}
let text_lower = text.to_lowercase();
let bloom_templates: Vec<(BloomLevel, &[&str])> = vec![
(
BloomLevel::Analysis,
&[
"What are the key components of {theme}, and how do they relate to each other?",
"Compare and contrast the approaches to {theme} discussed in the lecture.",
"What assumptions underlie the discussion of {theme}?",
],
),
(
BloomLevel::Synthesis,
&[
"How would you combine the concepts from {theme} with your existing knowledge to solve a novel problem?",
"Design a system that integrates {theme} with a complementary technique.",
],
),
(
BloomLevel::Evaluation,
&[
"What are the strengths and limitations of the approach to {theme} presented here?",
"Under what conditions would {theme} fail or underperform?",
],
),
(
BloomLevel::Application,
&[
"How would you apply {theme} in a production environment?",
"Describe a real-world scenario where {theme} would provide significant value.",
],
),
(
BloomLevel::Creation,
&[
"Propose an improvement or extension to {theme} that addresses a current limitation.",
"Design an experiment to validate the effectiveness of {theme} in your domain.",
],
),
];
let mut questions = Vec::new();
for (level, templates) in &bloom_templates {
let theme = select_theme_for_level(themes, &text_lower, level);
let template_idx = match level {
BloomLevel::Analysis => {
usize::from(text_lower.contains("compare") || text_lower.contains("contrast"))
}
BloomLevel::Evaluation => {
usize::from(text_lower.contains("limitation") || text_lower.contains("trade"))
}
_ => 0,
};
let template = templates[template_idx.min(templates.len() - 1)];
let question = template.replace("{theme}", &theme);
questions.push(ReflectionQuestion { question, thinking_level: *level });
}
questions
}
fn select_theme_for_level(themes: &[String], _text: &str, _level: &BloomLevel) -> String {
let idx = match _level {
BloomLevel::Analysis => 0,
BloomLevel::Synthesis => themes.len().min(1),
BloomLevel::Evaluation => themes.len().min(2) % themes.len(),
BloomLevel::Application => 0,
BloomLevel::Creation => themes.len().min(1) % themes.len(),
};
themes.get(idx).cloned().unwrap_or_else(|| "the topic".to_string())
}
fn find_citations_for_topic(db: &ArxivDatabase, topic: &str) -> Vec<ArxivCitation> {
let mut results = db.find_by_topic(topic, 5);
if results.len() < 3 {
let keywords: Vec<&str> = topic.split_whitespace().collect();
let additional = db.find_by_keywords(&keywords, 5 - results.len());
for cite in additional {
if !results.iter().any(|r| r.arxiv_id == cite.arxiv_id) {
results.push(cite);
}
}
}
results.truncate(5);
results
}
fn find_citations_for_themes(db: &ArxivDatabase, themes: &[String]) -> Vec<ArxivCitation> {
let keywords: Vec<&str> = themes.iter().map(|t| t.as_str()).collect();
let mut results = db.find_by_keywords(&keywords, 5);
if results.len() < 3 {
for theme in themes {
let additional = db.find_by_topic(theme, 2);
for cite in additional {
if !results.iter().any(|r| r.arxiv_id == cite.arxiv_id) {
results.push(cite);
}
}
if results.len() >= 5 {
break;
}
}
}
results.truncate(5);
results
}
fn is_stop_word(word: &str) -> bool {
const STOP: &[&str] = &[
"the",
"a",
"an",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"do",
"does",
"did",
"will",
"would",
"could",
"should",
"may",
"might",
"can",
"shall",
"to",
"of",
"in",
"for",
"on",
"with",
"at",
"by",
"from",
"as",
"or",
"and",
"but",
"if",
"not",
"no",
"so",
"up",
"out",
"it",
"its",
"this",
"that",
"these",
"those",
"we",
"you",
"they",
"he",
"she",
"my",
"your",
"our",
"us",
"all",
"each",
"every",
"both",
"few",
"more",
"most",
"other",
"some",
"such",
"than",
"too",
"very",
"just",
"also",
"about",
"which",
"what",
"when",
"where",
"how",
"who",
"whom",
"why",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"between",
"same",
"different",
"then",
"there",
"here",
"new",
"old",
"many",
"much",
"own",
"only",
"well",
"use",
"used",
"using",
"like",
"one",
"two",
"get",
"make",
"way",
];
STOP.contains(&word)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::oracle::coursera::types::TranscriptInput;
fn make_transcript(text: &str) -> TranscriptInput {
TranscriptInput {
text: text.to_string(),
language: "en".to_string(),
segments: vec![],
source_path: "test.txt".to_string(),
}
}
#[test]
fn test_generate_reflection_with_topic() {
let t = make_transcript(
"Machine learning models need careful deployment. MLOps practices help \
automate the deployment pipeline. Continuous integration ensures quality. \
Testing machine learning models requires specialized approaches.",
);
let reading = generate_reflection(&t, Some("mlops"));
assert!(!reading.citations.is_empty(), "Should find mlops citations");
assert!(!reading.questions.is_empty());
}
#[test]
fn test_generate_reflection_auto_themes() {
let t = make_transcript(
"Transformer models use attention mechanisms. Attention allows the model to \
focus on relevant parts of the input. The transformer architecture has \
revolutionized natural language processing. Attention is computed as a \
weighted sum of values. Transformer attention enables parallel computation.",
);
let reading = generate_reflection(&t, None);
assert!(!reading.themes.is_empty());
assert!(!reading.questions.is_empty());
}
#[test]
fn test_generate_reflection_empty_transcript() {
let t = make_transcript("");
let reading = generate_reflection(&t, None);
assert!(reading.themes.is_empty());
}
#[test]
fn test_render_reflection_markdown() {
let reading = ReflectionReading {
themes: vec!["Machine learning".to_string(), "Deployment".to_string()],
questions: vec![ReflectionQuestion {
question: "What are the key challenges?".to_string(),
thinking_level: BloomLevel::Analysis,
}],
citations: vec![ArxivCitation {
arxiv_id: "1706.03762".to_string(),
title: "Attention Is All You Need".to_string(),
authors: "Vaswani et al.".to_string(),
year: 2017,
url: "https://arxiv.org/abs/1706.03762".to_string(),
abstract_snippet: "Proposes the Transformer.".to_string(),
topics: vec!["transformer".to_string()],
}],
};
let md = render_reflection_markdown(&reading);
assert!(md.contains("# Reflection Reading"));
assert!(md.contains("Machine learning"));
assert!(md.contains("[Analysis]"));
assert!(md.contains("Vaswani"));
assert!(md.contains("https://arxiv.org"));
}
#[test]
fn test_render_reflection_empty() {
let reading = ReflectionReading { themes: vec![], questions: vec![], citations: vec![] };
let md = render_reflection_markdown(&reading);
assert!(md.contains("No dominant themes"));
assert!(md.contains("No reflection questions"));
assert!(md.contains("No matching citations"));
}
#[test]
fn test_extract_themes() {
let themes = extract_themes(
"Deep learning models require large datasets for training. Deep learning \
has transformed computer vision and natural language processing. Training \
deep learning models requires significant compute resources. Deep learning \
architectures include transformers and convolutional networks.",
);
assert!(!themes.is_empty());
assert!(
themes.iter().any(|t| t.to_lowercase().contains("deep") || t.to_lowercase().contains("learning")),
"Themes: {:?}",
themes
);
}
#[test]
fn test_bloom_question_levels() {
let themes = vec!["Machine learning".to_string()];
let questions = generate_bloom_questions(&themes, "Machine learning is important.");
assert_eq!(questions.len(), 5);
let levels: Vec<BloomLevel> = questions.iter().map(|q| q.thinking_level).collect();
assert!(levels.contains(&BloomLevel::Analysis));
assert!(levels.contains(&BloomLevel::Synthesis));
assert!(levels.contains(&BloomLevel::Evaluation));
assert!(levels.contains(&BloomLevel::Application));
assert!(levels.contains(&BloomLevel::Creation));
}
#[test]
fn test_bloom_questions_empty_themes() {
let questions = generate_bloom_questions(&[], "Some text");
assert!(questions.is_empty());
}
#[test]
fn test_find_citations_for_topic() {
let db = ArxivDatabase::builtin();
let results = find_citations_for_topic(&db, "transformer");
assert!(!results.is_empty());
assert!(results.len() <= 5);
}
#[test]
fn test_find_citations_for_themes() {
let db = ArxivDatabase::builtin();
let themes = vec!["mlops".to_string(), "deployment".to_string()];
let results = find_citations_for_themes(&db, &themes);
assert!(!results.is_empty());
}
#[test]
fn test_bloom_compare_contrast_template() {
let themes = vec!["Neural networks".to_string()];
let questions =
generate_bloom_questions(&themes, "We compare different neural network architectures.");
let analysis_q = questions
.iter()
.find(|q| q.thinking_level == BloomLevel::Analysis)
.expect("unexpected failure");
assert!(
analysis_q.question.contains("Compare and contrast"),
"Got: {}",
analysis_q.question
);
}
#[test]
fn test_bloom_limitation_template() {
let themes = vec!["Attention".to_string()];
let questions =
generate_bloom_questions(&themes, "A key limitation of attention is memory cost.");
let eval_q = questions
.iter()
.find(|q| q.thinking_level == BloomLevel::Evaluation)
.expect("unexpected failure");
assert!(
eval_q.question.contains("conditions") || eval_q.question.contains("fail"),
"Got: {}",
eval_q.question
);
}
#[test]
fn test_capitalize_theme_empty() {
assert_eq!(capitalize_theme(""), "");
}
#[test]
fn test_find_citations_for_topic_sparse() {
let db = ArxivDatabase::builtin();
let results = find_citations_for_topic(&db, "obscure quantum federated distillation");
assert!(results.len() <= 5);
}
#[test]
fn test_find_citations_for_themes_individual_fallback() {
let db = ArxivDatabase::builtin();
let themes =
vec!["xyznonexistent".to_string(), "transformer".to_string(), "attention".to_string()];
let results = find_citations_for_themes(&db, &themes);
assert!(!results.is_empty());
}
}