#[derive(Debug, Clone)]
pub struct ValidationResult {
pub train_size: usize,
pub test_size: usize,
pub evaluated: usize,
pub metrics: RankingMetrics,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_train_and_suggest() {
let commands = vec![
"git status".to_string(),
"git commit -m test".to_string(),
"git push".to_string(),
"git status".to_string(),
"git log".to_string(),
];
let mut model = MarkovModel::new(3);
model.train(&commands);
let suggestions = model.suggest("git ", 3);
assert!(!suggestions.is_empty());
let has_status = suggestions.iter().any(|(s, _)| s.contains("status"));
assert!(has_status);
}
#[test]
fn test_ngram_counts() {
let commands = vec!["ls -la".to_string(), "ls -la /tmp".to_string()];
let mut model = MarkovModel::new(2);
model.train(&commands);
assert!(model.ngram_count() > 0);
assert_eq!(model.vocab_size(), 2);
}
#[test]
fn test_partial_token_completion() {
let commands = vec![
"git commit -m test".to_string(),
"git checkout main".to_string(),
"git clone url".to_string(),
"git status".to_string(),
];
let mut model = MarkovModel::new(3);
model.train(&commands);
let suggestions = model.suggest("git c", 5);
assert!(
!suggestions.is_empty(),
"Should have suggestions for 'git c'"
);
for (suggestion, _) in &suggestions {
assert!(
suggestion.starts_with("git c"),
"Suggestion '{}' should start with 'git c'",
suggestion
);
}
let suggestion_text: String = suggestions.iter().map(|(s, _)| s.as_str()).collect();
assert!(
suggestion_text.contains("commit")
|| suggestion_text.contains("checkout")
|| suggestion_text.contains("clone"),
"Should suggest commit/checkout/clone, got: {:?}",
suggestions
);
}
#[test]
fn test_is_corrupted_command() {
assert!(
MarkovModel::is_corrupted_command("git commit-m test"),
"Should detect 'commit-m' as corrupted"
);
assert!(
MarkovModel::is_corrupted_command("git add-A"),
"Should detect 'add-A' as corrupted"
);
assert!(
!MarkovModel::is_corrupted_command("git commit -m test"),
"Should NOT detect valid 'commit -m' as corrupted"
);
assert!(
!MarkovModel::is_corrupted_command("git checkout feature-branch"),
"Should NOT detect 'feature-branch' as corrupted"
);
}
#[test]
fn test_partial_token_filters_corrupted() {
let commands = vec![
"git commit -m test".to_string(),
"git commit-m broken".to_string(), "git checkout main".to_string(),
];
let mut model = MarkovModel::new(3);
model.train(&commands);
let suggestions = model.suggest("git co", 5);
for (suggestion, _) in &suggestions {
assert!(
!suggestion.contains("commit-m"),
"Should not suggest corrupted 'commit-m', got: {}",
suggestion
);
}
}
#[test]
fn test_partial_token_single_char() {
let commands = vec![
"git status".to_string(),
"git status".to_string(),
"git stash".to_string(),
"git show".to_string(),
];
let mut model = MarkovModel::new(3);
model.train(&commands);
let suggestions = model.suggest("git s", 5);
assert!(!suggestions.is_empty());
for (suggestion, _) in &suggestions {
assert!(
suggestion.starts_with("git s"),
"Expected 'git s*', got: {}",
suggestion
);
}
assert!(
suggestions[0].0.contains("status"),
"Most frequent 'status' should be first, got: {}",
suggestions[0].0
);
}
#[test]
fn test_trailing_space_vs_no_space() {
let commands = vec![
"git status".to_string(),
"grep pattern".to_string(),
"git commit".to_string(),
];
let mut model = MarkovModel::new(3);
model.train(&commands);
let with_space = model.suggest("git ", 5);
assert!(with_space
.iter()
.any(|(s, _)| s == "git status" || s == "git commit"));
let without_space = model.suggest("git", 5);
assert!(without_space.iter().all(|(s, _)| s.starts_with("git")));
}
#[test]
fn test_is_corrupted_double_spaces() {
assert!(
MarkovModel::is_corrupted_command("cargo-lambda help"),
"Should detect double spaces as corrupted"
);
assert!(
MarkovModel::is_corrupted_command("git status"),
"Should detect double spaces as corrupted"
);
assert!(
!MarkovModel::is_corrupted_command("git status"),
"Single space is valid"
);
}
#[test]
fn test_is_corrupted_trailing_backslash() {
assert!(
MarkovModel::is_corrupted_command("git rm -r --cached vendor/\\"),
"Should detect trailing backslash"
);
assert!(
MarkovModel::is_corrupted_command("cargo lambda deploy \\\\"),
"Should detect trailing escape"
);
assert!(
!MarkovModel::is_corrupted_command("git rm -r --cached vendor/"),
"Path without backslash is valid"
);
}
#[test]
fn test_is_corrupted_typos() {
assert!(
MarkovModel::is_corrupted_command("gitr push"),
"Should detect 'gitr' as typo"
);
assert!(
MarkovModel::is_corrupted_command("giti pull"),
"Should detect 'giti' as typo"
);
assert!(
MarkovModel::is_corrupted_command("cargoo build"),
"Should detect 'cargoo' as typo"
);
assert!(
!MarkovModel::is_corrupted_command("git push"),
"Valid command should pass"
);
assert!(
!MarkovModel::is_corrupted_command("cargo build"),
"Valid command should pass"
);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
use std::fs;
use tempfile::NamedTempFile;
fn arb_command() -> impl Strategy<Value = String> {
prop_oneof![
Just("git status".to_string()),
Just("git commit -m 'test'".to_string()),
Just("git push origin main".to_string()),
Just("cargo build --release".to_string()),
Just("cargo test".to_string()),
Just("docker run -it ubuntu".to_string()),
Just("kubectl get pods".to_string()),
Just("npm install".to_string()),
Just("ls -la".to_string()),
Just("cd ..".to_string()),
"[a-z]{3,10}( -[a-z])?( [a-z]{2,8})?".prop_map(|s| s),
]
}
fn arb_commands(min: usize, max: usize) -> impl Strategy<Value = Vec<String>> {
proptest::collection::vec(arb_command(), min..max)
}
proptest! {
#[test]
fn prop_roundtrip_preserves_data(commands in arb_commands(5, 50)) {
let mut model = MarkovModel::new(3);
model.train(&commands);
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let loaded = MarkovModel::load(file.path()).expect("load");
prop_assert_eq!(loaded.n, model.n, "n-gram size mismatch");
prop_assert_eq!(loaded.total_commands, model.total_commands, "command count mismatch");
prop_assert_eq!(loaded.command_freq.len(), model.command_freq.len(), "vocab mismatch");
}
#[test]
fn prop_model_type_is_ngram_lm(commands in arb_commands(3, 20)) {
let mut model = MarkovModel::new(3);
model.train(&commands);
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let bytes = fs::read(file.path()).expect("read");
let model_type = u16::from_le_bytes([bytes[6], bytes[7]]);
prop_assert_eq!(model_type, 0x0010, "Model type should be NgramLm (0x0010)");
}
#[test]
fn prop_magic_is_aprn(commands in arb_commands(3, 20)) {
let mut model = MarkovModel::new(3);
model.train(&commands);
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let bytes = fs::read(file.path()).expect("read");
prop_assert_eq!(&bytes[0..4], b"APRN", "Magic should be APRN");
}
#[test]
fn prop_command_freq_preserved_after_roundtrip(commands in arb_commands(10, 50)) {
let mut model = MarkovModel::new(3);
model.train(&commands);
let before_freq = model.command_freq.clone();
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let loaded = MarkovModel::load(file.path()).expect("load");
prop_assert_eq!(loaded.command_freq, before_freq, "command_freq should match after roundtrip");
}
#[test]
fn prop_ngram_size_preserved(n in 2usize..=5) {
let commands: Vec<String> = vec![
"git status".to_string(),
"git commit".to_string(),
"cargo build".to_string(),
];
let mut model = MarkovModel::new(n);
model.train(&commands);
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let loaded = MarkovModel::load(file.path()).expect("load");
prop_assert_eq!(loaded.n, n, "n-gram size should be preserved");
}
#[test]
fn prop_empty_model_roundtrip(n in 2usize..=5) {
let model = MarkovModel::new(n);
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let loaded = MarkovModel::load(file.path()).expect("load");
prop_assert_eq!(loaded.n, n);
prop_assert_eq!(loaded.total_commands, 0);
prop_assert!(loaded.command_freq.is_empty());
}
#[test]
fn prop_file_size_reasonable(commands in arb_commands(10, 100)) {
let mut model = MarkovModel::new(3);
model.train(&commands);
let file = NamedTempFile::new().expect("temp file");
model.save(file.path()).expect("save");
let metadata = fs::metadata(file.path()).expect("metadata");
let size = metadata.len();
prop_assert!(size < 1_000_000, "File too large: {} bytes", size);
prop_assert!(size > 100, "File too small: {} bytes", size);
}
}
}