mod words;
use rand::Rng;
use std::fmt;
use std::str::FromStr;
use thiserror::Error;
pub use words::{ADJECTIVES, NOUNS, SHORT_WORDS};
#[derive(Debug, Error)]
pub enum FloridError {
#[error("length must be between {min} and {max}, got {got}")]
InvalidLength { min: usize, max: usize, got: usize },
#[error("invalid florid format: {0}")]
InvalidFormat(String),
#[error("unable to generate id of exact length {0} after {1} attempts")]
GenerationFailed(usize, usize),
}
pub const MIN_LENGTH: usize = 5;
pub const MAX_LENGTH: usize = 36;
const SHORT_THRESHOLD: usize = 10;
const MAX_ATTEMPTS: usize = 1000;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Florid {
value: String,
}
impl Florid {
pub fn as_str(&self) -> &str {
&self.value
}
pub fn len(&self) -> usize {
self.value.len()
}
pub fn is_empty(&self) -> bool {
self.value.is_empty()
}
pub fn words(&self) -> impl Iterator<Item = &str> {
self.value.split(|c: char| c == '-' || c.is_ascii_digit())
.filter(|s| !s.is_empty())
}
pub fn word_count(&self) -> usize {
self.words().count()
}
}
impl fmt::Display for Florid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.value)
}
}
impl FromStr for Florid {
type Err = FloridError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let normalized = s.to_lowercase();
if normalized.len() < MIN_LENGTH || normalized.len() > MAX_LENGTH {
return Err(FloridError::InvalidFormat(format!(
"length {} is outside valid range [{}, {}]",
normalized.len(),
MIN_LENGTH,
MAX_LENGTH
)));
}
for c in normalized.chars() {
if !c.is_ascii_lowercase() && c != '-' && !c.is_ascii_digit() {
return Err(FloridError::InvalidFormat(format!(
"invalid character '{}' - only lowercase letters, digits, and hyphens allowed",
c
)));
}
}
Ok(Florid { value: normalized })
}
}
impl AsRef<str> for Florid {
fn as_ref(&self) -> &str {
&self.value
}
}
pub fn florid(length: usize) -> Result<String, FloridError> {
florid_with_rng(length, &mut rand::thread_rng())
}
pub fn florid_with_rng<R: Rng>(length: usize, rng: &mut R) -> Result<String, FloridError> {
if length < MIN_LENGTH || length > MAX_LENGTH {
return Err(FloridError::InvalidLength {
min: MIN_LENGTH,
max: MAX_LENGTH,
got: length,
});
}
for _ in 0..MAX_ATTEMPTS {
let result = if length < SHORT_THRESHOLD {
generate_short_id(length, rng)
} else {
generate_standard_id(length, rng)
};
if let Some(id) = result {
if id.len() == length {
return Ok(id);
}
}
}
Err(FloridError::GenerationFailed(length, MAX_ATTEMPTS))
}
fn generate_short_id<R: Rng>(target_len: usize, rng: &mut R) -> Option<String> {
let words_by_len = bucket_words_by_length(SHORT_WORDS);
let mut available_lens: Vec<usize> = words_by_len.keys().copied().collect();
available_lens.sort();
let mut valid_combos: Vec<(usize, usize)> = Vec::new();
for &w1_len in &available_lens {
for &w2_len in &available_lens {
let total = w1_len + 1 + w2_len;
if total == target_len {
valid_combos.push((w1_len, w2_len));
}
}
}
if valid_combos.is_empty() {
return None;
}
let (w1_len, w2_len) = valid_combos[rng.gen_range(0..valid_combos.len())];
let w1_list = words_by_len.get(&w1_len)?;
let w2_list = words_by_len.get(&w2_len)?;
if w1_list.is_empty() || w2_list.is_empty() {
return None;
}
let w1 = w1_list[rng.gen_range(0..w1_list.len())];
let w2 = w2_list[rng.gen_range(0..w2_list.len())];
let digit = rng.gen_range(0..10);
Some(format!("{}{}{}", w1, digit, w2))
}
fn generate_standard_id<R: Rng>(target_len: usize, rng: &mut R) -> Option<String> {
let all_words: Vec<&str> = ADJECTIVES.iter().chain(NOUNS.iter()).copied().collect();
let words_by_len = bucket_words_by_length(&all_words);
let word_count = match target_len {
10..=15 => 2,
16..=24 => 3,
_ => 4,
};
let hyphen_count = word_count - 1;
let available_for_words = target_len - hyphen_count;
if let Some(lengths) = find_word_lengths(available_for_words, word_count, &words_by_len, rng) {
let mut words = Vec::with_capacity(word_count);
for len in lengths {
if let Some(word_list) = words_by_len.get(&len) {
if !word_list.is_empty() {
words.push(word_list[rng.gen_range(0..word_list.len())]);
} else {
return None;
}
} else {
return None;
}
}
return Some(words.join("-"));
}
None
}
fn find_word_lengths<R: Rng>(
total: usize,
count: usize,
words_by_len: &std::collections::HashMap<usize, Vec<&str>>,
rng: &mut R,
) -> Option<Vec<usize>> {
let mut available_lens: Vec<usize> = words_by_len
.iter()
.filter(|(_, v)| !v.is_empty())
.map(|(&k, _)| k)
.collect();
available_lens.sort();
if available_lens.is_empty() {
return None;
}
let min_word_len = *available_lens.iter().min().unwrap_or(&3);
let max_word_len = *available_lens.iter().max().unwrap_or(&12);
if total < count * min_word_len || total > count * max_word_len {
return None;
}
for _ in 0..100 {
let mut lengths = Vec::with_capacity(count);
let mut remaining = total;
for i in 0..count {
let words_left = count - i;
let min_needed = (words_left - 1) * min_word_len;
let max_allowed = remaining.saturating_sub(min_needed);
let min_for_this = min_word_len.max(remaining.saturating_sub((words_left - 1) * max_word_len));
let max_for_this = max_word_len.min(max_allowed);
if min_for_this > max_for_this {
break;
}
let valid_lens: Vec<usize> = available_lens
.iter()
.copied()
.filter(|&l| l >= min_for_this && l <= max_for_this)
.collect();
if valid_lens.is_empty() {
break;
}
let len = valid_lens[rng.gen_range(0..valid_lens.len())];
lengths.push(len);
remaining -= len;
}
if lengths.len() == count && remaining == 0 {
return Some(lengths);
}
}
None
}
fn bucket_words_by_length<'a>(words: &'a [&'a str]) -> std::collections::HashMap<usize, Vec<&'a str>> {
let mut buckets = std::collections::HashMap::new();
for word in words {
buckets
.entry(word.len())
.or_insert_with(Vec::new)
.push(*word);
}
buckets
}
pub fn is_valid(s: &str) -> bool {
Florid::from_str(s).is_ok()
}
pub fn normalize(s: &str) -> Option<String> {
Florid::from_str(s).ok().map(|n| n.value)
}
pub fn entropy_bits() -> f64 {
let total_words = ADJECTIVES.len() + NOUNS.len();
(total_words as f64).log2()
}
pub fn collision_probability(num_ids: u64, num_words: usize) -> f64 {
let total_words = (ADJECTIVES.len() + NOUNS.len()) as f64;
let space = total_words.powi(num_words as i32);
let n = num_ids as f64;
1.0 - (-n * (n - 1.0) / (2.0 * space)).exp()
}
#[cfg(test)]
mod tests {
use super::*;
use rand::SeedableRng;
use rand::rngs::StdRng;
fn seeded_rng() -> StdRng {
StdRng::seed_from_u64(12345)
}
#[test]
fn test_florid_returns_correct_length() {
let mut rng = seeded_rng();
for len in MIN_LENGTH..=MAX_LENGTH {
let result = florid_with_rng(len, &mut rng);
assert!(result.is_ok(), "Failed to generate florid of length {}", len);
assert_eq!(result.unwrap().len(), len, "Length mismatch for target {}", len);
}
}
#[test]
fn test_florid_rejects_invalid_lengths() {
assert!(matches!(
florid(4),
Err(FloridError::InvalidLength { min: 5, max: 36, got: 4 })
));
assert!(matches!(
florid(37),
Err(FloridError::InvalidLength { min: 5, max: 36, got: 37 })
));
assert!(matches!(
florid(0),
Err(FloridError::InvalidLength { min: 5, max: 36, got: 0 })
));
}
#[test]
fn test_florid_is_lowercase() {
let mut rng = seeded_rng();
for _ in 0..100 {
let id = florid_with_rng(20, &mut rng).unwrap();
assert_eq!(id, id.to_lowercase());
}
}
#[test]
fn test_florid_uses_only_valid_characters() {
let mut rng = seeded_rng();
for len in MIN_LENGTH..=MAX_LENGTH {
let id = florid_with_rng(len, &mut rng).unwrap();
for c in id.chars() {
assert!(
c.is_ascii_lowercase() || c == '-' || c.is_ascii_digit(),
"Invalid character '{}' in id '{}'", c, id
);
}
}
}
#[test]
fn test_short_ids_use_digits_as_separators() {
let mut rng = seeded_rng();
for len in 5..10 {
let id = florid_with_rng(len, &mut rng).unwrap();
assert!(
id.chars().any(|c| c.is_ascii_digit()),
"Short id '{}' should contain a digit", id
);
assert!(
!id.contains('-'),
"Short id '{}' should not contain hyphens", id
);
}
}
#[test]
fn test_standard_ids_use_hyphens() {
let mut rng = seeded_rng();
for len in 10..=MAX_LENGTH {
let id = florid_with_rng(len, &mut rng).unwrap();
assert!(
id.contains('-'),
"Standard id '{}' (len={}) should contain hyphens", id, len
);
}
}
#[test]
fn test_florid_struct_display() {
let florid = Florid { value: "red-cat-dog".to_string() };
assert_eq!(format!("{}", florid), "red-cat-dog");
}
#[test]
fn test_florid_struct_parse() {
let florid: Florid = "red-cat-dog".parse().unwrap();
assert_eq!(florid.as_str(), "red-cat-dog");
}
#[test]
fn test_florid_struct_parse_normalizes_case() {
let florid: Florid = "RED-CAT-DOG".parse().unwrap();
assert_eq!(florid.as_str(), "red-cat-dog");
}
#[test]
fn test_florid_struct_parse_rejects_invalid() {
assert!(Florid::from_str("ab").is_err());
assert!(Florid::from_str("hello_world").is_err());
assert!(Florid::from_str("hello world").is_err());
}
#[test]
fn test_florid_words() {
let florid: Florid = "red-cat-dog".parse().unwrap();
let words: Vec<&str> = florid.words().collect();
assert_eq!(words, vec!["red", "cat", "dog"]);
}
#[test]
fn test_florid_word_count() {
let florid: Florid = "red-cat-dog".parse().unwrap();
assert_eq!(florid.word_count(), 3);
}
#[test]
fn test_is_valid() {
assert!(is_valid("red-cat-dog"));
assert!(is_valid("abc1def"));
assert!(!is_valid("ab"));
assert!(!is_valid("hello_world"));
}
#[test]
fn test_normalize() {
assert_eq!(normalize("RED-CAT-DOG"), Some("red-cat-dog".to_string()));
assert_eq!(normalize("ab"), None);
}
#[test]
fn test_deterministic_with_seed() {
let mut rng1 = StdRng::seed_from_u64(42);
let mut rng2 = StdRng::seed_from_u64(42);
let ids1: Vec<String> = (0..10).map(|_| florid_with_rng(20, &mut rng1).unwrap()).collect();
let ids2: Vec<String> = (0..10).map(|_| florid_with_rng(20, &mut rng2).unwrap()).collect();
assert_eq!(ids1, ids2);
}
#[test]
fn test_different_seeds_produce_different_ids() {
let mut rng1 = StdRng::seed_from_u64(42);
let mut rng2 = StdRng::seed_from_u64(43);
let id1 = florid_with_rng(20, &mut rng1).unwrap();
let id2 = florid_with_rng(20, &mut rng2).unwrap();
assert_ne!(id1, id2);
}
#[test]
fn test_entropy_bits_is_reasonable() {
let bits = entropy_bits();
assert!(bits > 10.0, "Entropy should be at least 10 bits per word");
assert!(bits < 20.0, "Entropy should be less than 20 bits per word");
}
#[test]
fn test_collision_probability_increases_with_ids() {
let p1 = collision_probability(1000, 3);
let p2 = collision_probability(10000, 3);
let p3 = collision_probability(100000, 3);
assert!(p1 < p2);
assert!(p2 < p3);
}
#[test]
fn test_collision_probability_decreases_with_words() {
let p2 = collision_probability(10000, 2);
let p3 = collision_probability(10000, 3);
let p4 = collision_probability(10000, 4);
assert!(p2 > p3);
assert!(p3 > p4);
}
#[test]
fn test_uniqueness_sample() {
let mut rng = seeded_rng();
let mut ids = std::collections::HashSet::new();
let sample_size = 1000;
for _ in 0..sample_size {
let id = florid_with_rng(20, &mut rng).unwrap();
ids.insert(id);
}
assert_eq!(ids.len(), sample_size, "All generated IDs should be unique in sample");
}
#[test]
fn test_word_distribution_is_reasonable() {
let mut rng = seeded_rng();
let mut first_words = std::collections::HashMap::new();
for _ in 0..1000 {
let id = florid_with_rng(20, &mut rng).unwrap();
let first_word = id.split('-').next().unwrap().to_string();
*first_words.entry(first_word).or_insert(0) += 1;
}
let max_count = *first_words.values().max().unwrap();
assert!(
max_count < 100,
"No single word should appear more than 10% of the time, but max was {}",
max_count
);
}
}