mecrab_word2vec/
lib.rs

1//! mecrab-word2vec: Pure Rust Word2Vec implementation
2//!
3//! Fast, memory-efficient word2vec training optimized for Japanese morphological analysis.
4//!
5//! # Features
6//!
7//! - Skip-gram with negative sampling
8//! - Multi-threaded training with Rayon
9//! - Direct MCV1 format output
10//! - Memory-efficient streaming
11//!
12//! # Example
13//!
14//! ```no_run
15//! use mecrab_word2vec::Word2VecBuilder;
16//!
17//! let model = Word2VecBuilder::new()
18//!     .vector_size(100)
19//!     .window_size(5)
20//!     .negative_samples(5)
21//!     .min_count(10)
22//!     .epochs(3)
23//!     .threads(8)
24//!     .build()?;
25//!
26//! model.train_from_file("corpus.txt")?;
27//! model.save_text("vectors.txt")?;
28//! # Ok::<(), anyhow::Error>(())
29//! ```
30
31mod io;
32mod model;
33mod skipgram;
34mod trainer;
35mod vocab;
36
37pub use model::{Word2Vec, Word2VecBuilder};
38pub use vocab::Vocabulary;
39
40use thiserror::Error;
41
42#[derive(Error, Debug)]
43pub enum Word2VecError {
44    #[error("IO error: {0}")]
45    Io(#[from] std::io::Error),
46
47    #[error("Invalid parameter: {0}")]
48    InvalidParameter(String),
49
50    #[error("Training error: {0}")]
51    Training(String),
52
53    #[error("Vocabulary error: {0}")]
54    Vocabulary(String),
55}
56
57pub type Result<T> = std::result::Result<T, Word2VecError>;