rust2vec-utils 0.5.2

rust2vec utilities
Documentation
use std::io::BufRead;

use clap::{App, AppSettings, Arg, ArgMatches};
use rust2vec::similarity::Similarity;
use rust2vec_utils::{read_embeddings_view, EmbeddingFormat};
use stdinout::{Input, OrExit};

static DEFAULT_CLAP_SETTINGS: &[AppSettings] = &[
    AppSettings::DontCollapseArgsInUsage,
    AppSettings::UnifiedHelpMessage,
];

fn parse_args() -> ArgMatches<'static> {
    App::new("r2v-similar")
        .settings(DEFAULT_CLAP_SETTINGS)
        .arg(
            Arg::with_name("format")
                .short("f")
                .value_name("FORMAT")
                .help("Embedding format: finalfusion, finalfusion_mmap, word2vec, text, or textdims (default: finalfusion)")
                .takes_value(true),
        )
        .arg(
            Arg::with_name("neighbors")
                .short("k")
                .value_name("K")
                .help("Return K nearest neighbors (default: 10)")
                .takes_value(true),
        )
        .arg(
            Arg::with_name("EMBEDDINGS")
                .help("Embeddings file")
                .index(1)
                .required(true),
        )
        .arg(Arg::with_name("INPUT").help("Input words").index(2))
        .get_matches()
}

struct Config {
    embeddings_filename: String,
    embedding_format: EmbeddingFormat,
    k: usize,
}

fn config_from_matches<'a>(matches: &ArgMatches<'a>) -> Config {
    let embeddings_filename = matches.value_of("EMBEDDINGS").unwrap().to_owned();

    let embedding_format = matches
        .value_of("format")
        .map(|f| EmbeddingFormat::try_from(f).or_exit("Cannot parse embedding format", 1))
        .unwrap_or(EmbeddingFormat::FinalFusion);

    let k = matches
        .value_of("neighbors")
        .map(|v| v.parse().or_exit("Cannot parse k", 1))
        .unwrap_or(10);

    Config {
        embeddings_filename,
        embedding_format,
        k,
    }
}

fn main() {
    let matches = parse_args();
    let config = config_from_matches(&matches);

    let embeddings = read_embeddings_view(&config.embeddings_filename, config.embedding_format)
        .or_exit("Cannot read embeddings", 1);

    let input = Input::from(matches.value_of("INPUT"));
    let reader = input.buf_read().or_exit("Cannot open input for reading", 1);

    for line in reader.lines() {
        let line = line.or_exit("Cannot read line", 1).trim().to_owned();
        if line.is_empty() {
            continue;
        }

        let results = match embeddings.similarity(&line, config.k) {
            Some(results) => results,
            None => continue,
        };

        for similar in results {
            println!("{}\t{}", similar.word, similar.similarity);
        }
    }
}