use std::collections::HashMap;
use std::time::{Duration, Instant};
use tokio::time::sleep;
use thag_profiler::{self, enable_profiling, end, is_profiling_enabled, profile, profiled};
struct Document {
id: usize,
content: String,
word_count: HashMap<String, usize>,
sentiment_score: f64,
is_processed: bool,
}
impl Document {
#[profiled]
fn new(id: usize, content: String) -> Self {
std::thread::sleep(Duration::from_millis(10));
let _create_something = vec![
"Hello".to_string(),
"world".to_string(),
"testing".to_string(),
"testing".to_string(),
];
Document {
id,
content,
word_count: HashMap::new(),
sentiment_score: 0.0,
is_processed: false,
}
}
#[profiled(both, mem_detail)]
fn count_words(&mut self) {
std::thread::sleep(Duration::from_millis(20));
let words = self.content.split_whitespace();
for word in words {
let word = word
.to_lowercase()
.chars()
.filter(|c| c.is_alphabetic())
.collect::<String>();
if !word.is_empty() {
*self.word_count.entry(word).or_insert(0) += 1;
}
}
}
#[profiled]
fn calculate_sentiment(&mut self) -> f64 {
std::thread::sleep(Duration::from_millis(30));
let positive_words = ["good", "great", "excellent", "happy", "positive"];
let negative_words = ["bad", "awful", "terrible", "sad", "negative"];
let mut score = 0.0;
for (word, count) in &self.word_count {
if positive_words.contains(&word.as_str()) {
score += 1.0 * *count as f64;
} else if negative_words.contains(&word.as_str()) {
score -= 1.0 * *count as f64;
}
}
let total_words: usize = self.word_count.values().sum();
if total_words > 0 {
score /= total_words as f64;
}
let _create_something = vec![
"Hello".to_string(),
"world".to_string(),
"testing".to_string(),
"testing".to_string(),
];
self.sentiment_score = score;
score
}
}
#[profiled]
async fn fetch_document(id: usize) -> Document {
sleep(Duration::from_millis(40)).await;
let content = format!(
"This is document {} with test content. It has good and bad words.",
id
);
Document::new(id, content)
}
#[profiled]
async fn process_document(mut doc: Document) -> Document {
doc.count_words();
doc.calculate_sentiment();
profile!(delay, both, async_fn);
let _dummy = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
sleep(Duration::from_millis(15)).await;
end!(delay);
doc.is_processed = true;
doc
}
#[profiled]
async fn generate_and_process_documents(count: usize) -> Vec<Document> {
let mut documents = Vec::with_capacity(count);
for id in 0..count {
let doc = fetch_document(id).await;
let processed_doc = process_document(doc).await;
documents.push(processed_doc);
}
documents
}
#[profiled]
async fn run_batch(count: usize) {
println!(
"is_profiling_enabled()? {}, get_global_profile_type(): {:?}",
thag_profiler::is_profiling_enabled(),
thag_profiler::get_global_profile_type()
);
let start = Instant::now();
let docs = generate_and_process_documents(count).await;
println!(
"Processed {} documents in {:?}",
docs.len(),
start.elapsed()
);
profile!(print_docs, time, mem_summary, async_fn, unbounded);
for doc in &docs {
let _dummy = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
sleep(Duration::from_millis(150)).await;
println!(
"Doc #{}: Word count: {}, Sentiment: {:.2}",
doc.id,
doc.word_count.len(),
doc.sentiment_score
);
}
}
#[tokio::main]
#[cfg_attr(debug_assertions, enable_profiling(runtime))]
async fn main() {
println!("Starting simplified document processing example");
run_batch(3).await;
profile!(second_batch, time, mem_summary, async_fn);
run_batch(2).await;
end!(second_batch);
profile!(last_batch, time, mem_summary, async_fn);
run_batch(1).await;
end!(last_batch);
if is_profiling_enabled() {
println!("Profiling data written to folded files in current directory");
}
}