use colored::*;
use std::collections::HashMap;
use std::env;
use std::error::Error;
use std::fs;
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
let contents = if config.file_name.ends_with(".pdf") {
println!("{}", "Reading pdf file...".bright_blue().bold());
pdf_extract::extract_text(&config.file_name)?
} else {
fs::read_to_string(config.file_name)?
};
let structural_analysis_results = analyze_structure(&contents);
println!("{}", "Structural Analysis:".cyan().bold());
println!(
" Lines: {}",
structural_analysis_results.lines.to_string().yellow()
);
println!(
" Words: {}",
structural_analysis_results.words.to_string().yellow()
);
println!(
" Characters: {}",
structural_analysis_results.characters.to_string().yellow()
); println!(
" Stanzas: {}",
structural_analysis_results.stanzas.to_string().yellow()
);
let keywords = extract_keywords(&contents, 5); println!("\n{}", "Keywords (Top 5):".cyan().bold());
for (keyword, count) in keywords {
println!(" {}: {}", keyword.green().bold(), count.to_string().blue());
}
println!("\n{}", "Search Results:".cyan().bold());
let search_results = if config.case_sensitive {
search(&config.query, &contents)
} else {
search_case_insensitive(&config.query, &contents)
};
if search_results.is_empty() {
println!("{}", " No matches found.".italic());
} else {
for line_content in search_results {
let query_to_highlight = &config.query;
if config.case_sensitive {
let mut last_end = 0;
for (start, part) in line_content.match_indices(query_to_highlight) {
print!("{}", &line_content[last_end..start]);
print!("{}", part.magenta().bold());
last_end = start + part.len();
}
println!("{}", &line_content[last_end..]);
} else {
let lower_query = query_to_highlight.to_lowercase();
let mut last_end = 0;
let mut current_search_pos = 0;
let lower_line_content = line_content.to_lowercase();
while let Some(start_in_lower) =
lower_line_content[current_search_pos..].find(&lower_query)
{
let actual_start = current_search_pos + start_in_lower;
let match_len = lower_query.len();
let actual_end = actual_start + match_len;
print!("{}", &line_content[last_end..actual_start]);
print!("{}", (&line_content[actual_start..actual_end]).red().bold());
last_end = actual_end;
current_search_pos = actual_end;
if current_search_pos >= line_content.len() {
break;
}
}
println!("{}", &line_content[last_end..]);
}
}
}
Ok(())
}
pub struct Config {
pub query: String,
pub file_name: String,
pub case_sensitive: bool,
}
impl Config {
pub fn new(args: &[String]) -> Result<Config, &str> {
if args.len() < 3 {
return Err("Looks like you passed some wonky doohickeys");
}
let query = args[1].clone();
let file_name = args[2].clone();
let case_sensitive = env::var("CASE_INSENSITIVE").is_err();
Ok(Config {
query,
file_name,
case_sensitive,
})
}
}
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let mut res = Vec::new();
for line in contents.lines() {
if line.contains(query) {
res.push(line);
}
}
res
}
pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let query_lower = query.to_lowercase(); let mut res = Vec::new();
for line in contents.lines() {
if line.to_lowercase().contains(&query_lower) {
res.push(line);
}
}
res
}
pub struct StructuralAnalysisResults {
pub lines: usize,
pub words: usize,
pub characters: usize, pub stanzas: usize,
}
pub fn analyze_structure(contents: &str) -> StructuralAnalysisResults {
let lines = contents.lines().count();
let words = contents.split_whitespace().count();
let characters = contents.chars().count(); let stanzas = contents.split("\\n\\n").count();
StructuralAnalysisResults {
lines,
words,
characters,
stanzas,
}
}
pub fn extract_keywords(contents: &str, top_n: usize) -> Vec<(String, usize)> {
let mut word_counts = HashMap::new();
for word in contents.split_whitespace() {
let cleaned_word = word
.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric())
.collect::<String>();
if !cleaned_word.is_empty() {
*word_counts.entry(cleaned_word).or_insert(0) += 1;
}
}
let mut sorted_keywords: Vec<(String, usize)> = word_counts.into_iter().collect();
sorted_keywords.sort_by(|a, b| b.1.cmp(&a.1));
sorted_keywords.truncate(top_n);
sorted_keywords
}