use embedding::*;
fn main() -> Result<(), String> {
let text = if let Ok(data) = std::fs::read_to_string("examples/data.txt") {
data
} else {
"the quick brown fox jumps over the lazy dog. the fox is quick and the dog is lazy.".to_string()
};
let sentences = load_text_data(&text);
println!("Loaded {} sentences", sentences.len());
let (vocab, reverse_vocab) = build_vocab(&sentences);
println!("Built vocabulary with {} words", vocab.len());
println!("Sample vocabulary:");
for (i, word) in reverse_vocab.iter().enumerate().take(10) {
println!(" {}: {}", i, word);
}
let training_data = TrainingData {
sentences,
vocab,
reverse_vocab,
};
let config = TrainingConfig {
embedding_dim: 10, learning_rate: 0.1, epochs: 5,
batch_size: 32,
context_window: 2,
negative_samples: 5,
model_type: ModelType::SkipGram,
lr_schedule: LearningRateSchedule::Constant,
early_stopping: None,
l2_regularization: None,
gradient_clip: None,
};
println!("Training with config: {:?}", config);
let mut model = EmbeddingModel::new(config, training_data.vocab.len());
model.train(&training_data)?;
if let Some(similarity) = model.similarity("fox", "dog", &training_data) {
println!("Similarity between 'fox' and 'dog': {:.4}", similarity);
} else {
println!("Could not calculate similarity (words not in vocabulary)");
}
if let Some(similarity) = model.similarity("quick", "fox", &training_data) {
println!("Similarity between 'quick' and 'fox': {:.4}", similarity);
} else {
println!("Could not calculate similarity (words not in vocabulary)");
}
model.save_embeddings("demo_embeddings.txt", &training_data)?;
println!("Embeddings saved to demo_embeddings.txt");
Ok(())
}