flerp 0.5.0

CLI tool that does XYZ
Documentation
use crate::app_structs::{SearchMatch, StructuralAnalysisResults};
use regex::RegexBuilder;
use std::collections::{HashMap, HashSet};

#[derive(Debug, Clone, Copy)]
pub struct SearchOptions {
    pub case_sensitive: bool,
    pub regex_mode: bool,
    pub whole_word: bool,
}

pub fn search(query: &str, contents: &str) -> Vec<String> {
    contents
        .lines()
        .filter(|line| line.contains(query))
        .map(|line| line.to_string())
        .collect()
}

pub fn search_case_insensitive(query: &str, contents: &str) -> Vec<String> {
    let query_lower = query.to_lowercase();
    contents
        .lines()
        .filter(|line| line.to_lowercase().contains(&query_lower))
        .map(|line| line.to_string())
        .collect()
}

pub fn search_with_options(
    query: &str,
    contents: &str,
    options: SearchOptions,
) -> Result<Vec<SearchMatch>, String> {
    if query.is_empty() {
        return Ok(Vec::new());
    }

    if !options.regex_mode && !options.whole_word {
        let matched_lines = if options.case_sensitive {
            search(query, contents)
        } else {
            search_case_insensitive(query, contents)
        };

        let mut remaining_matches = HashMap::new();
        for line in matched_lines {
            *remaining_matches.entry(line).or_insert(0usize) += 1;
        }

        return Ok(contents
            .lines()
            .enumerate()
            .filter_map(|(index, line)| {
                let remaining = remaining_matches.get_mut(line)?;
                if *remaining == 0 {
                    return None;
                }
                *remaining -= 1;

                let match_count = if options.case_sensitive {
                    line.match_indices(query).count()
                } else {
                    count_case_insensitive_matches(line, query)
                };

                Some(SearchMatch {
                    line_number: index + 1,
                    line: line.to_string(),
                    match_count,
                })
            })
            .collect());
    }

    let pattern = if options.regex_mode {
        if options.whole_word {
            format!(r"\b(?:{})\b", query)
        } else {
            query.to_string()
        }
    } else {
        let escaped = regex::escape(query);
        if options.whole_word {
            format!(r"\b{}\b", escaped)
        } else {
            escaped
        }
    };

    let regex = RegexBuilder::new(&pattern)
        .case_insensitive(!options.case_sensitive)
        .build()
        .map_err(|error| error.to_string())?;

    Ok(contents
        .lines()
        .enumerate()
        .filter_map(|(index, line)| {
            let match_count = regex.find_iter(line).count();
            if match_count == 0 {
                None
            } else {
                Some(SearchMatch {
                    line_number: index + 1,
                    line: line.to_string(),
                    match_count,
                })
            }
        })
        .collect())
}

pub fn analyze_structure(contents: &str) -> StructuralAnalysisResults {
    let lines = contents.lines().count();
    let words = contents.split_whitespace().count();
    let characters = contents.chars().count();
    let stanzas = contents.split("\n\n").count();
    let empty_lines = contents.lines().filter(|line| line.trim().is_empty()).count();
    let longest_line = contents.lines().map(|line| line.chars().count()).max().unwrap_or(0);

    let cleaned_words: Vec<String> = contents
        .split_whitespace()
        .map(normalize_word)
        .filter(|word| !word.is_empty())
        .collect();
    let unique_words = cleaned_words.iter().collect::<HashSet<_>>().len();
    let total_word_length: usize = cleaned_words.iter().map(|word| word.chars().count()).sum();
    let average_word_length = if cleaned_words.is_empty() {
        0.0
    } else {
        total_word_length as f64 / cleaned_words.len() as f64
    };

    StructuralAnalysisResults {
        lines,
        words,
        characters,
        stanzas,
        empty_lines,
        unique_words,
        longest_line,
        average_word_length,
    }
}

pub fn extract_keywords(contents: &str, top_n: usize) -> Vec<(String, usize)> {
    let mut word_counts = HashMap::new();
    for word in contents.split_whitespace() {
        let cleaned_word = normalize_word(word);
        if cleaned_word.len() >= 3 {
            *word_counts.entry(cleaned_word).or_insert(0) += 1;
        }
    }

    let mut sorted_keywords: Vec<(String, usize)> = word_counts.into_iter().collect();
    sorted_keywords.sort_by(|a, b| b.1.cmp(&a.1));
    sorted_keywords.truncate(top_n);
    sorted_keywords
}

pub fn extract_repeated_lines(contents: &str, top_n: usize) -> Vec<(String, usize)> {
    let mut counts = HashMap::new();

    for line in contents.lines().map(str::trim).filter(|line| !line.is_empty()) {
        *counts.entry(line.to_string()).or_insert(0) += 1;
    }

    let mut repeated: Vec<(String, usize)> = counts.into_iter().filter(|(_, count)| *count > 1).collect();
    repeated.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
    repeated.truncate(top_n);
    repeated
}

fn normalize_word(word: &str) -> String {
    word.to_lowercase()
        .chars()
        .filter(|c| c.is_alphanumeric())
        .collect::<String>()
}

fn count_case_insensitive_matches(line: &str, query: &str) -> usize {
    let lower_line = line.to_lowercase();
    let lower_query = query.to_lowercase();
    let mut count = 0;
    let mut start = 0;

    while let Some(offset) = lower_line[start..].find(&lower_query) {
        count += 1;
        start += offset + lower_query.len();
    }

    count
}