use futures::future::join_all;
use std::collections::HashMap;
use std::fs::File;
use std::io::{self, BufRead, BufReader, Write};
use std::path::Path;
use std::time::{Duration, Instant};
use thag_profiler::{self, enable_profiling, end, profile, profiled};
struct Document {
id: usize,
content: String,
word_count: HashMap<String, usize>,
sentiment_score: f64,
is_processed: bool,
}
impl Document {
#[profiled]
fn new(id: usize, content: String) -> Self {
let _create_something = vec![
"Hello".to_string(),
"world".to_string(),
"testing".to_string(),
"testing".to_string(),
];
Document {
id,
content,
word_count: HashMap::new(),
sentiment_score: 0.0,
is_processed: false,
}
}
#[profiled]
fn count_words(&mut self) {
let start = Instant::now();
let words = self.content.split_whitespace();
for word in words {
let word = word
.to_lowercase()
.chars()
.filter(|c| c.is_alphabetic())
.collect::<String>();
if !word.is_empty() {
*self.word_count.entry(word).or_insert(0) += 1;
}
}
while start.elapsed().as_millis() < 10 {
std::hint::spin_loop();
}
}
#[profiled]
fn calculate_sentiment(&mut self) -> f64 {
let positive_words = ["good", "great", "excellent", "happy", "positive"];
let negative_words = ["bad", "awful", "terrible", "sad", "negative"];
let mut score = 0.0;
let start = Instant::now();
for (word, count) in &self.word_count {
if positive_words.contains(&word.as_str()) {
score += 1.0 * *count as f64;
} else if negative_words.contains(&word.as_str()) {
score -= 1.0 * *count as f64;
}
}
let total_words: usize = self.word_count.values().sum();
if total_words > 0 {
score /= total_words as f64;
}
while start.elapsed().as_millis() < 15 {
std::hint::spin_loop();
}
self.sentiment_score = score;
score
}
#[profiled]
fn summarize(&self) -> String {
let start = Instant::now();
let mut word_vec: Vec<(&String, &usize)> = self.word_count.iter().collect();
word_vec.sort_by(|a, b| b.1.cmp(a.1));
let summary = word_vec
.iter()
.take(5)
.map(|(word, count)| format!("{} ({})", word, count))
.collect::<Vec<String>>()
.join(", ");
while start.elapsed().as_millis() < 5 {
std::hint::spin_loop();
}
format!(
"Doc #{}: [{}] - Sentiment: {:.2}",
self.id, summary, self.sentiment_score
)
}
}
#[profiled]
pub fn busy_wait(duration: Duration) {
let start = Instant::now();
while start.elapsed() < duration {
std::hint::spin_loop(); }
}
#[profiled]
async fn fetch_document(id: usize) -> Document {
busy_wait(Duration::from_millis(20));
profile!(gen_content);
let _dummy = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let content = format!(
"This is document {} with some random content. \
It contains good words and sometimes bad words. \
The quality varies from excellent to terrible \
depending on the document.",
id
);
end!(gen_content);
Document::new(id, content)
}
#[profiled]
async fn process_document(mut doc: Document) -> Document {
doc.count_words();
doc.calculate_sentiment();
doc.is_processed = true;
doc
}
#[profiled]
fn batch_process_documents(documents: &mut [Document]) {
for doc in documents.iter_mut() {
doc.count_words();
doc.calculate_sentiment();
doc.is_processed = true;
}
}
#[profiled]
fn analyze_sentiment_distribution(documents: &[Document]) -> HashMap<String, usize> {
let mut distribution = HashMap::new();
for doc in documents {
let sentiment = match doc.sentiment_score {
s if s > 0.5 => "very_positive",
s if s > 0.0 => "positive",
s if s == 0.0 => "neutral",
s if s > -0.5 => "negative",
_ => "very_negative",
};
*distribution.entry(sentiment.to_string()).or_insert(0) += 1;
}
distribution
}
#[profiled]
fn write_reports(documents: &[Document], path: &Path) -> io::Result<()> {
let mut file = File::create(path)?;
for doc in documents {
if doc.is_processed {
writeln!(file, "{}", doc.summarize())?;
}
}
Ok(())
}
#[allow(dead_code)]
#[profiled]
fn load_documents_from_file(path: &Path) -> io::Result<Vec<Document>> {
let file = File::open(path)?;
let reader = BufReader::new(file);
let mut documents = Vec::new();
let mut current_id = 0;
let mut current_content = String::new();
for line in reader.lines() {
let line = line?;
if line.starts_with("---") {
if !current_content.is_empty() {
documents.push(Document::new(current_id, current_content));
current_content = String::new();
current_id += 1;
}
} else {
current_content.push_str(&line);
current_content.push('\n');
}
}
if !current_content.is_empty() {
documents.push(Document::new(current_id, current_content));
}
Ok(documents)
}
#[profiled]
async fn generate_and_process_documents(count: usize) -> Vec<Document> {
let mut tasks = Vec::new();
profile!(task_loop);
for id in 0..count {
tasks.push(async move {
let doc = fetch_document(id).await;
process_document(doc).await
});
}
end!(task_loop);
profile!(joins);
let docs = join_all(tasks).await;
end!(joins);
docs
}
#[tokio::main]
#[enable_profiling(runtime)]
async fn main() -> io::Result<()> {
println!(
"PROFILING_FEATURE_ENABLED={}",
thag_profiler::PROFILING_FEATURE_ENABLED
);
let docs = generate_and_process_documents(50).await;
let sentiment_distribution = analyze_sentiment_distribution(&docs);
println!("Sentiment distribution: {:?}", sentiment_distribution);
let doc_data: Vec<String> = (0..30)
.map(|i| {
format!(
"Document {} with various contents that need processing. \
Some documents are positive and happy, others are negative \
and awful. This helps test our sentiment analysis.",
i
)
})
.collect();
let mut sync_docs: Vec<Document> = doc_data
.iter()
.enumerate()
.map(|(id, content)| Document::new(id, content.clone()))
.collect();
batch_process_documents(&mut sync_docs);
let total_words: usize = docs
.iter()
.chain(sync_docs.iter())
.flat_map(|doc| doc.word_count.values())
.sum();
println!(
"Processed {} documents with {} total words",
docs.len() + sync_docs.len(),
total_words
);
write_reports(&docs, Path::new("async_docs_report.txt"))?;
write_reports(&sync_docs, Path::new("sync_docs_report.txt"))?;
println!("Reports written successfully");
Ok(())
}