unclaimed 0.1.0

Ahoy! Chart yer course through the high seas of crates.io to discover unclaimed treasure names for yer next Rust venture
use std::{
    collections::BTreeSet as Set,
    fs::{read_to_string, File},
    io::Write,
    path::Path,
};

use clap::Parser;

#[derive(Parser, Debug)]
struct Args {
    #[arg(short, long)]
    input: std::path::PathBuf,
}

fn main() {
    let args = Args::parse();

    let crates = load_crate_names();

    let mut candidates = Set::new();
    let lines = read_lines(args.input);
    lines.iter().for_each(|r| {
        candidates.insert(r.clone());
    });

    println!("Total crates: {}", crates.len());

    println!("Searching for available names...");

    let available: Set<_> = candidates.difference(&crates).collect();

    println!("{} of {} possible values are unclaimed crates.", available.len(), candidates.len());
    println!("Available crates: ");
    for c in available {
        println!("{}", c);
    }
}

fn read_lines<P: AsRef<Path>>(filename: P) -> Vec<String> {
    read_to_string(filename)
        .unwrap() // panic on possible file-reading errors
        .lines() // split the string into an iterator of string slices
        .map(String::from) // make each slice into a string
        .filter_map(|s| normalize(&s))
        .collect() // gather them together into a vector
}

fn normalize(s: &str) -> Option<String> {
    // Early return if non-ASCII
    if !s.is_ascii() {
        return None;
    }

    // Pre-allocate with same capacity
    let mut result = String::with_capacity(s.len());

    // Single pass transformation
    for c in s.bytes() {
        match c {
            b' ' => result.push('-'),
            b'.' => continue,  // Skip dots
            c => result.push(c.to_ascii_lowercase() as char),
        }
    }

    Some(result)
}

fn load_crate_names() -> Set<String> {
    let path = Path::new("all-crates.txt");

    if path.exists() {
        return Set::from_iter(read_lines("all-crates.txt").into_iter());
    }

    // Load from the db-dump and save it for the next run
    let mut crates = Set::new();
    let _ = db_dump::Loader::new()
        .crates(|row| {
            crates.insert(row.name.to_lowercase());
        })
        .load("./db-dump.tar.gz");

    let mut f = File::create(path).expect("unable to create file");
    let contents = crates
        .iter()
        .map(|x| x.to_string())
        .collect::<Vec<_>>()
        .join("\n");
    f.write_all(contents.as_bytes())
        .expect("Unable to write to file");

    crates
}


#[cfg(test)]
mod tests {
    use crate::normalize;


    #[test]
    fn test_normalize() {
        assert_eq!(normalize("Mr. Mime").unwrap(), "mr-mime");
    }
}