pub use crate::error::SpecIdError;
use unicode_normalization::UnicodeNormalization;
use xchecker_redaction;
pub fn sanitize_spec_id(id: &str) -> Result<String, SpecIdError> {
let normalized: String = id.nfkc().collect();
let mut sanitized: String = normalized
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '-' {
c
} else {
'_'
}
})
.collect();
while sanitized.contains("..") {
sanitized = sanitized.replace("..", "__");
}
if sanitized.is_empty() {
return Err(SpecIdError::Empty);
}
let has_meaningful_content = sanitized
.chars()
.any(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-');
if !has_meaningful_content {
return Err(SpecIdError::OnlyInvalidCharacters);
}
if sanitized != id {
let redacted_original = xchecker_redaction::redact_user_string(id);
let redacted_sanitized = xchecker_redaction::redact_user_string(&sanitized);
eprintln!(
"Warning: spec ID sanitized from '{redacted_original}' to '{redacted_sanitized}'"
);
}
Ok(sanitized)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::UserFriendlyError;
#[test]
fn test_spec_id_error_user_friendly() {
let empty_err = SpecIdError::Empty;
assert!(!empty_err.user_message().is_empty());
assert!(empty_err.context().is_some());
assert!(!empty_err.suggestions().is_empty());
let invalid_err = SpecIdError::OnlyInvalidCharacters;
assert!(!invalid_err.user_message().is_empty());
assert!(invalid_err.context().is_some());
assert!(!invalid_err.suggestions().is_empty());
let suggestions = invalid_err.suggestions();
assert!(suggestions.iter().any(|s| s.contains("Example:")));
}
#[test]
fn test_valid_spec_id_unchanged() {
assert_eq!(sanitize_spec_id("my-spec").unwrap(), "my-spec");
assert_eq!(sanitize_spec_id("my_spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my.spec").unwrap(), "my.spec");
assert_eq!(sanitize_spec_id("MySpec123").unwrap(), "MySpec123");
assert_eq!(sanitize_spec_id("spec-123_v2.0").unwrap(), "spec-123_v2.0");
}
#[test]
fn test_invalid_characters_replaced() {
assert_eq!(sanitize_spec_id("my spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my/spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my\\spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my:spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my*spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my?spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my\"spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my<spec>").unwrap(), "my_spec_");
assert_eq!(sanitize_spec_id("my|spec").unwrap(), "my_spec");
assert_eq!(
sanitize_spec_id("my!@#$%^&*()spec").unwrap(),
"my__________spec"
);
}
#[test]
fn test_unicode_confusables_normalized() {
assert_eq!(sanitize_spec_id("my-spec").unwrap(), "my-spec");
assert_eq!(sanitize_spec_id("MySpec123").unwrap(), "MySpec123");
let result = sanitize_spec_id("ⓜⓨ⒮ⓟⓔⓒ").unwrap();
assert!(result.contains('m'));
assert!(result.contains('y'));
assert!(result.contains('s'));
assert!(result.contains('p'));
assert!(result.contains('e'));
assert!(result.contains('c'));
}
#[test]
fn test_control_characters_replaced() {
assert_eq!(sanitize_spec_id("my\nspec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my\tspec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my\rspec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my\x00spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my\x1Fspec").unwrap(), "my_spec");
}
#[test]
fn test_whitespace_replaced() {
assert_eq!(sanitize_spec_id("my spec").unwrap(), "my_spec");
assert_eq!(sanitize_spec_id("my spec").unwrap(), "my__spec");
assert_eq!(sanitize_spec_id(" my-spec ").unwrap(), "__my-spec__");
assert_eq!(sanitize_spec_id("my\u{00A0}spec").unwrap(), "my_spec"); }
#[test]
fn test_empty_id_rejected() {
assert!(matches!(sanitize_spec_id(""), Err(SpecIdError::Empty)));
}
#[test]
fn test_only_invalid_characters_rejected() {
assert!(matches!(
sanitize_spec_id("!!!"),
Err(SpecIdError::OnlyInvalidCharacters)
));
assert!(matches!(
sanitize_spec_id(" "),
Err(SpecIdError::OnlyInvalidCharacters)
));
assert!(matches!(
sanitize_spec_id("@#$%"),
Err(SpecIdError::OnlyInvalidCharacters)
));
}
#[test]
fn test_mixed_valid_invalid() {
assert_eq!(sanitize_spec_id("my-spec!@#").unwrap(), "my-spec___");
assert_eq!(sanitize_spec_id("!!!my-spec").unwrap(), "___my-spec");
assert_eq!(sanitize_spec_id("my!!!spec").unwrap(), "my___spec");
}
#[test]
fn test_unicode_emoji_replaced() {
assert_eq!(sanitize_spec_id("my-spec-🚀").unwrap(), "my-spec-_");
assert_eq!(sanitize_spec_id("🎉party🎊").unwrap(), "_party_");
}
#[test]
fn test_unicode_letters_replaced() {
assert_eq!(sanitize_spec_id("café").unwrap(), "caf_");
assert_eq!(sanitize_spec_id("naïve").unwrap(), "na_ve");
assert!(matches!(
sanitize_spec_id("日本語"),
Err(SpecIdError::OnlyInvalidCharacters)
));
}
#[test]
fn test_long_spec_id() {
let long_id = "a".repeat(200);
assert_eq!(sanitize_spec_id(&long_id).unwrap(), long_id);
let long_invalid = "!".repeat(200);
assert!(matches!(
sanitize_spec_id(&long_invalid),
Err(SpecIdError::OnlyInvalidCharacters)
));
}
#[test]
fn test_edge_cases() {
assert_eq!(sanitize_spec_id("a").unwrap(), "a");
assert_eq!(sanitize_spec_id("1").unwrap(), "1");
assert_eq!(sanitize_spec_id("-").unwrap(), "-");
assert_eq!(sanitize_spec_id(".").unwrap(), ".");
assert_eq!(sanitize_spec_id("...").unwrap(), "__.");
assert_eq!(sanitize_spec_id("---").unwrap(), "---");
assert!(matches!(
sanitize_spec_id("_"),
Err(SpecIdError::OnlyInvalidCharacters)
));
assert!(matches!(
sanitize_spec_id("___"),
Err(SpecIdError::OnlyInvalidCharacters)
));
}
#[test]
fn test_nfkc_normalization() {
assert_eq!(sanitize_spec_id("file").unwrap(), "file");
assert_eq!(sanitize_spec_id("spec²").unwrap(), "spec2");
assert_eq!(sanitize_spec_id("temp℃").unwrap(), "temp_C");
}
}