use embedding::*;
fn main() -> Result<(), String> {
let text = "the quick brown fox jumps over the lazy dog. the fox is quick and the dog is lazy.";
let sentences = load_text_data(text);
println!("Loaded {} sentences", sentences.len());
let (vocab, reverse_vocab) = build_vocab(&sentences);
println!("Built vocabulary with {} words", vocab.len());
println!("Sample vocabulary:");
for i in 0..std::cmp::min(10, reverse_vocab.len()) {
println!(" {}: {}", i, reverse_vocab[i]);
}
let training_data = TrainingData {
sentences,
vocab,
reverse_vocab,
};
let config = TrainingConfig {
embedding_dim: 10, learning_rate: 0.1, epochs: 5,
batch_size: 32,
context_window: 2,
negative_samples: 5,
model_type: ModelType::SkipGram,
lr_schedule: LearningRateSchedule::Constant,
early_stopping: None,
l2_regularization: None,
dropout_rate: None,
};
println!("Training with config: {:?}", config);
let mut model = EmbeddingModel::new(config, training_data.vocab.len());
model.train(&training_data)?;
if let Some(similarity) = model.similarity("fox", "dog", &training_data) {
println!("Similarity between 'fox' and 'dog': {:.4}", similarity);
} else {
println!("Could not calculate similarity (words not in vocabulary)");
}
if let Some(similarity) = model.similarity("quick", "fox", &training_data) {
println!("Similarity between 'quick' and 'fox': {:.4}", similarity);
} else {
println!("Could not calculate similarity (words not in vocabulary)");
}
model.save_embeddings("demo_embeddings.txt", &training_data)?;
println!("Embeddings saved to demo_embeddings.txt");
Ok(())
}