strut_deserialize/
slug.rs

1use serde::de::{Error, MapAccess};
2use serde_value::Value;
3use std::borrow::Borrow;
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::fmt::{Display, Formatter};
7use std::hash::{Hash, Hasher};
8use std::ops::Deref;
9
10pub mod map;
11
12/// An owned [`String`] slug that retains only ASCII alphanumeric characters and
13/// forces the retained characters to lowercase.
14///
15/// This is intended as a normalized identifier that only stores the relevant
16/// textual part of the input. This makes inputs like `"SECTION_TITLE"`,
17/// `"sectiontitle"`, `"++SectionTitle"`, etc. all equivalent.
18///
19/// ## Ambiguity
20///
21/// The accepted drawback is that this leaves no way to differentiate slugs only
22/// by punctuation: e.g. `"re-sign"` (sign again) and `"resign"` (quit) are
23/// treated as the same slug. Same for `"Unit_S"` and `"units"`.
24///
25/// It is expected that the slugs are chosen with their normalization rules in
26/// mind.
27#[derive(Debug, Clone)]
28pub struct Slug {
29    original: String,
30    normalized: String,
31}
32
33impl Slug {
34    /// Creates a new [`Slug`] from the given [`String`]-like input, possibly
35    /// modifying the input in-place. The original input is also retained.
36    pub fn new(slug: impl Into<String>) -> Self {
37        let original = slug.into();
38        let mut normalized = original.clone();
39
40        normalized.retain(|c| c.is_ascii_alphanumeric());
41        normalized.make_ascii_lowercase();
42
43        Self {
44            original,
45            normalized,
46        }
47    }
48
49    /// Exposes the original string from which this [`Slug`] was created.
50    pub fn original(&self) -> &str {
51        &self.original
52    }
53
54    /// Exposes the normalized string version of this [`Slug`], which is its
55    /// primary string representation.
56    pub fn normalized(&self) -> &str {
57        &self.normalized
58    }
59}
60
61impl Slug {
62    /// Produces the same result as checking two [`Slug`]s for equivalence
63    /// without allocating any [`Slug`]s.
64    ///
65    /// Two slugs are equivalent when their ASCII alphanumeric characters match
66    /// pairwise (case-insensitively), while all other characters in both slugs
67    /// are ignored. Under these rules, the following versions of the same slug
68    /// are all equivalent:
69    ///
70    /// - `"MULTI_WORD_SLUG"`
71    /// - `"MultiWordSlug"`
72    /// - `"multiwordslug"`
73    /// - `"++multi-word-slug!"`
74    /// - etc.
75    pub fn eq_as_slugs(a: &str, b: &str) -> bool {
76        let mut iter_a = a.chars().filter(|&c| c.is_ascii_alphanumeric());
77        let mut iter_b = b.chars().filter(|&c| c.is_ascii_alphanumeric());
78
79        loop {
80            match (iter_a.next(), iter_b.next()) {
81                (Some(c1), Some(c2)) => {
82                    if !c1.eq_ignore_ascii_case(&c2) {
83                        return false;
84                    }
85                }
86                (None, None) => return true, // both sides reached the end
87                _ => return false, // one side reached the end, but the other still has valid characters
88            }
89        }
90    }
91
92    /// Produces the same result as comparing two [`Slug`]s for total ordering
93    /// without allocating any [`Slug`]s.
94    ///
95    /// Two slugs are totally ordered only by their ASCII alphanumeric
96    /// characters, pairwise (case-insensitively), while all other characters in
97    /// both slugs are ignored.
98    pub fn cmp_as_slugs(a: &str, b: &str) -> Ordering {
99        let mut iter_a = a.chars().filter(|&c| c.is_ascii_alphanumeric());
100        let mut iter_b = b.chars().filter(|&c| c.is_ascii_alphanumeric());
101
102        loop {
103            match (iter_a.next(), iter_b.next()) {
104                (Some(mut c1), Some(mut c2)) => {
105                    // Force copied chars on both sides to lowercase in-place
106                    c1.make_ascii_lowercase();
107                    c2.make_ascii_lowercase();
108
109                    // Now compare
110                    match c1.cmp(&c2) {
111                        Ordering::Equal => continue,
112                        non_eq => return non_eq,
113                    }
114                }
115                (None, None) => return Ordering::Equal,
116                (None, Some(_)) => return Ordering::Less,
117                (Some(_), None) => return Ordering::Greater,
118            }
119        }
120    }
121}
122
123impl Slug {
124    /// Consumes the given [`MapAccess`] and materializes it into a [`HashMap`].
125    /// Merges the map’s [`Value`]s at colliding [`Slug`] keys, but only if both
126    /// nested values are [maps](Value::Map); otherwise an error is returned.
127    ///
128    /// This is intended as a helper method to group up deserialization inputs
129    /// before deserializing the values and (presumably) putting them into a
130    /// [`SlugMap`](crate::SlugMap).
131    ///
132    /// ## Error
133    ///
134    /// Returns an error if the given input cannot be deserialized as a map, or
135    /// if at least one of the values at colliding keys is not a nested map.
136    pub fn group_map<'de, A>(mut input: A) -> Result<HashMap<Slug, Value>, A::Error>
137    where
138        A: MapAccess<'de>,
139    {
140        let mut merged_values: HashMap<Slug, Value> = if let Some(len) = input.size_hint() {
141            HashMap::with_capacity(len)
142        } else {
143            HashMap::new()
144        };
145
146        while let Some((next_key, next_value)) = input.next_entry::<String, Value>()? {
147            let next_slug = Slug::new(next_key);
148
149            if let Some((existing_slug, existing_value)) = merged_values.remove_entry(&next_slug) {
150                // Merge the values
151                let merged_value = match (existing_value, next_value) {
152                    // If both values are maps, merge them
153                    (Value::Map(mut existing_map), Value::Map(mut next_map)) => {
154                        // Order matters
155                        if existing_slug.original.len() >= next_slug.original.len() {
156                            existing_map.extend(next_map);
157                            Value::Map(existing_map)
158                        } else {
159                            next_map.extend(existing_map);
160                            Value::Map(next_map)
161                        }
162                    }
163
164                    // Otherwise, return an error
165                    (a, b) => {
166                        return Err(Error::custom(format!(
167                            "collision for key {}: cannot merge values of type {:?} and {:?}",
168                            existing_slug, a, b,
169                        )));
170                    }
171                };
172
173                // Pick the slug with the longest value
174                let merged_slug = if existing_slug.original.len() >= next_slug.original.len() {
175                    existing_slug
176                } else {
177                    next_slug
178                };
179
180                // Insert the merged value back
181                merged_values.insert(merged_slug, merged_value);
182            } else {
183                // Insert the next value
184                merged_values.insert(next_slug, next_value);
185            }
186        }
187
188        Ok(merged_values)
189    }
190}
191
192const _: () = {
193    impl PartialEq for Slug {
194        fn eq(&self, other: &Self) -> bool {
195            self.normalized.eq(&other.normalized)
196        }
197    }
198
199    impl Eq for Slug {}
200
201    impl PartialOrd for Slug {
202        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
203            self.normalized.partial_cmp(&other.normalized)
204        }
205    }
206
207    impl Ord for Slug {
208        fn cmp(&self, other: &Self) -> Ordering {
209            self.normalized.cmp(&other.normalized)
210        }
211    }
212
213    impl Hash for Slug {
214        fn hash<H: Hasher>(&self, state: &mut H) {
215            self.normalized.hash(state);
216        }
217    }
218
219    impl From<String> for Slug {
220        fn from(value: String) -> Self {
221            Self::new(value)
222        }
223    }
224
225    impl From<&str> for Slug {
226        fn from(value: &str) -> Self {
227            Self::new(value)
228        }
229    }
230
231    impl Borrow<str> for Slug {
232        fn borrow(&self) -> &str {
233            &self.normalized
234        }
235    }
236
237    impl AsRef<str> for Slug {
238        fn as_ref(&self) -> &str {
239            &self.normalized
240        }
241    }
242
243    impl Deref for Slug {
244        type Target = str;
245
246        fn deref(&self) -> &Self::Target {
247            &self.normalized
248        }
249    }
250
251    impl From<Slug> for String {
252        fn from(value: Slug) -> Self {
253            value.normalized
254        }
255    }
256
257    impl Display for Slug {
258        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
259            f.write_str(&self.normalized)
260        }
261    }
262};
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267    use pretty_assertions::assert_eq;
268
269    #[test]
270    fn normalized() {
271        // Identity for already-normalized lowercase
272        assert_eq!(Slug::new("input").as_ref(), "input");
273
274        // Uppercase is lowercased
275        assert_eq!(Slug::new("INPUT").as_ref(), "input");
276
277        // Mixed case is lowercased
278        assert_eq!(Slug::new("InPuT").as_ref(), "input");
279
280        // Underscores removed
281        assert_eq!(Slug::new("in_put").as_ref(), "input");
282
283        // Dashes removed
284        assert_eq!(Slug::new("in-put").as_ref(), "input");
285
286        // Spaces removed
287        assert_eq!(Slug::new("in put").as_ref(), "input");
288
289        // Mixed punctuation removed
290        assert_eq!(Slug::new("in!p@u#t$").as_ref(), "input");
291
292        // Numbers retained
293        assert_eq!(Slug::new("in123put").as_ref(), "in123put");
294
295        // Mixed numbers and symbols
296        assert_eq!(Slug::new("i!n@1#2$3%p^u&t*").as_ref(), "in123put");
297
298        // Only symbols -> empty
299        assert_eq!(Slug::new("!@#$%^&*()").as_ref(), "");
300
301        // Only whitespace -> empty
302        assert_eq!(Slug::new("     ").as_ref(), "");
303
304        // Only underscores/dashes -> empty
305        assert_eq!(Slug::new("___---").as_ref(), "");
306
307        // Unicode letters removed
308        assert_eq!(Slug::new("áβç").as_ref(), "");
309
310        // Mixed ASCII and Unicode, keep ASCII
311        assert_eq!(Slug::new("aβc").as_ref(), "ac");
312
313        // Leading/trailing punctuation
314        assert_eq!(Slug::new("!!input!!").as_ref(), "input");
315
316        // Leading/trailing whitespace
317        assert_eq!(Slug::new("  input  ").as_ref(), "input");
318
319        // Mixed everything
320        assert_eq!(Slug::new("++In_PuT-123!@#").as_ref(), "input123");
321
322        // Empty string
323        assert_eq!(Slug::new("").as_ref(), "");
324
325        // Long string with mixed content
326        assert_eq!(
327            Slug::new("++SectionTitle_2025! Rust-lang --_β_ä_ç_1234").as_ref(),
328            "sectiontitle2025rustlang1234",
329        );
330    }
331
332    #[test]
333    fn std_vs_custom() {
334        assert_eq_and_cmp("", "", Ordering::Equal);
335        assert_eq_and_cmp("a", "A", Ordering::Equal);
336        assert_eq_and_cmp("A", "a", Ordering::Equal);
337        assert_eq_and_cmp("abc", "ABC", Ordering::Equal);
338        assert_eq_and_cmp("abc", "a_b_c", Ordering::Equal);
339        assert_eq_and_cmp("abc", "a-b-c", Ordering::Equal);
340        assert_eq_and_cmp("abc", "a b c", Ordering::Equal);
341        assert_eq_and_cmp("abc", "a!b@c#", Ordering::Equal);
342        assert_eq_and_cmp("abc", "a_b-c!", Ordering::Equal);
343        assert_eq_and_cmp("sectiontitle", "SECTION_TITLE", Ordering::Equal);
344        assert_eq_and_cmp("sectiontitle", "sectiontitle", Ordering::Equal);
345        assert_eq_and_cmp("sectiontitle", "++SectionTitle", Ordering::Equal);
346        assert_eq_and_cmp("multiwordslug", "MULTI_WORD_SLUG", Ordering::Equal);
347        assert_eq_and_cmp("multiwordslug", "MultiWordSlug", Ordering::Equal);
348        assert_eq_and_cmp("multiwordslug", "multiwordslug", Ordering::Equal);
349        assert_eq_and_cmp("multiwordslug", "++multi-word-slug!", Ordering::Equal);
350        assert_eq_and_cmp("units", "Unit_S", Ordering::Equal);
351        assert_eq_and_cmp("resign", "re-sign", Ordering::Equal);
352        assert_eq_and_cmp("abc123", "a_b_c_1_2_3", Ordering::Equal);
353        assert_eq_and_cmp("abc123", "A B C 1 2 3", Ordering::Equal);
354        assert_eq_and_cmp("abc123", "abc123", Ordering::Equal);
355        assert_eq_and_cmp("abc123", "ABC123", Ordering::Equal);
356        assert_eq_and_cmp("abc", "a!@#$%^&*()_+b{}:\"|?><c", Ordering::Equal);
357        assert_eq_and_cmp("abc", "A!@#$%^&*()_+B{}:\"|?><C", Ordering::Equal);
358        assert_eq_and_cmp("abc", "a_b_c", Ordering::Equal);
359        assert_eq_and_cmp("abc", "A-B-C", Ordering::Equal);
360        assert_eq_and_cmp("abc", "a b c", Ordering::Equal);
361        assert_eq_and_cmp("abc", "A B C", Ordering::Equal);
362        assert_eq_and_cmp("abc", "a!b@c#", Ordering::Equal);
363
364        assert_eq_and_cmp("", "a", Ordering::Less);
365        assert_eq_and_cmp("a", "b", Ordering::Less);
366        assert_eq_and_cmp("a", "A", Ordering::Equal);
367        assert_eq_and_cmp("abc", "abd", Ordering::Less);
368        assert_eq_and_cmp("abc", "abc1", Ordering::Less);
369        assert_eq_and_cmp("abc1", "abc2", Ordering::Less);
370        assert_eq_and_cmp("abc", "abcd", Ordering::Less);
371        assert_eq_and_cmp("abcd", "abc", Ordering::Greater);
372        assert_eq_and_cmp("abc", "a_b_c_d", Ordering::Less);
373        assert_eq_and_cmp("abc", "a-b-c-d", Ordering::Less);
374        assert_eq_and_cmp("abc", "a b c d", Ordering::Less);
375        assert_eq_and_cmp("abc1", "abc", Ordering::Greater);
376        assert_eq_and_cmp("abc", "abc", Ordering::Equal);
377
378        assert_eq_and_cmp("abc", "abd", Ordering::Less);
379        assert_eq_and_cmp("abd", "abc", Ordering::Greater);
380        assert_eq_and_cmp("abc", "ab", Ordering::Greater);
381        assert_eq_and_cmp("ab", "abc", Ordering::Less);
382
383        assert_eq_and_cmp("abc", "aβc", Ordering::Less);
384        assert_eq_and_cmp("abc", "açc", Ordering::Less);
385        assert_eq_and_cmp("abc", "ábć", Ordering::Less);
386        assert_eq_and_cmp("abc", "äbć", Ordering::Less);
387
388        assert_eq_and_cmp("β", "", Ordering::Equal);
389        assert_eq_and_cmp("ç", "", Ordering::Equal);
390        assert_eq_and_cmp("á", "", Ordering::Equal);
391        assert_eq_and_cmp("ä", "", Ordering::Equal);
392
393        assert_eq_and_cmp("!!!", "", Ordering::Equal);
394        assert_eq_and_cmp("___", "", Ordering::Equal);
395        assert_eq_and_cmp("---", "", Ordering::Equal);
396        assert_eq_and_cmp("   ", "", Ordering::Equal);
397
398        assert_eq_and_cmp("123", "1_2_3", Ordering::Equal);
399        assert_eq_and_cmp("123", "1-2-3", Ordering::Equal);
400        assert_eq_and_cmp("123", "1 2 3", Ordering::Equal);
401
402        assert_eq_and_cmp("abc123", "a_b_c_1_2_3", Ordering::Equal);
403        assert_eq_and_cmp("abc123", "A B C 1 2 3", Ordering::Equal);
404
405        assert_eq_and_cmp("a", "a!", Ordering::Equal);
406        assert_eq_and_cmp("a", "!a", Ordering::Equal);
407        assert_eq_and_cmp("a", "a_", Ordering::Equal);
408        assert_eq_and_cmp("a", "_a", Ordering::Equal);
409
410        assert_eq_and_cmp("A", "ab", Ordering::Less);
411        assert_eq_and_cmp("ab", "A", Ordering::Greater);
412    }
413
414    fn assert_eq_and_cmp(a: &str, b: &str, ordering: Ordering) {
415        assert_eq(a, b, ordering == Ordering::Equal);
416        assert_cmp(a, b, ordering);
417    }
418
419    fn assert_eq(a: &str, b: &str, expected_eq: bool) {
420        // Make slugs from both inputs
421        let a_slug = Slug::new(a);
422        let b_slug = Slug::new(b);
423
424        // Borrow &str from both slugs
425        let a_slug_str = a_slug.as_ref();
426        let b_slug_str = b_slug.as_ref();
427
428        // Compare slug versions using standard library
429        let std_eq = a_slug_str.eq(b_slug_str);
430
431        // Compare using custom logic
432        let custom_eq_str = Slug::eq_as_slugs(a, b);
433        let custom_eq_slug = Slug::eq_as_slugs(a_slug_str, b_slug_str);
434
435        // All equalities should be the same
436        assert_eq!(
437            expected_eq, std_eq,
438            "Failed eq check: '{}' vs '{}': expected {:?}, found {:?}",
439            a_slug_str, b_slug_str, expected_eq, std_eq,
440        );
441        assert_eq!(
442            std_eq, custom_eq_str,
443            "Failed eq check: std eq: '{}' vs '{}' = {:?}; custom eq: '{}' vs '{}' = {:?}",
444            a_slug_str, b_slug_str, std_eq, a, b, custom_eq_str,
445        );
446        assert_eq!(
447            custom_eq_str, custom_eq_slug,
448            "Failed eq check: custom eq (on originals): '{}' vs '{}' = {:?}; custom eq (on slugs): '{}' vs '{}' = {:?}",
449            a, b, custom_eq_str, a_slug_str, b_slug_str, custom_eq_slug,
450        );
451    }
452
453    fn assert_cmp(a: &str, b: &str, expected_cmp: Ordering) {
454        // Make slugs from both inputs
455        let a_slug = Slug::new(a);
456        let b_slug = Slug::new(b);
457
458        // Borrow &str from both slugs
459        let a_slug_str = a_slug.as_ref();
460        let b_slug_str = b_slug.as_ref();
461
462        // Compare slug versions using standard library
463        let std_cmp = a_slug_str.cmp(b_slug_str);
464
465        // Compare using custom logic
466        let custom_cmp_str = Slug::cmp_as_slugs(a, b);
467        let custom_cmp_slug = Slug::cmp_as_slugs(a_slug_str, b_slug_str);
468
469        // All comparisons should be the same
470        assert_eq!(
471            expected_cmp, std_cmp,
472            "Failed cmp check: '{}' vs '{}': expected {:?}, found {:?}",
473            a_slug_str, b_slug_str, expected_cmp, std_cmp,
474        );
475        assert_eq!(
476            std_cmp, custom_cmp_str,
477            "Failed cmp check: std cmp: '{}' vs '{}' = {:?}; custom cmp: '{}' vs '{}' = {:?}",
478            a_slug_str, b_slug_str, std_cmp, a, b, custom_cmp_str,
479        );
480        assert_eq!(
481            custom_cmp_str, custom_cmp_slug,
482            "Failed cmp check: custom cmp (on originals): '{}' vs '{}' = {:?}; custom cmp (on slugs): '{}' vs '{}' = {:?}",
483            a, b, custom_cmp_str, a_slug_str, b_slug_str, custom_cmp_slug,
484        );
485    }
486}
487
488#[cfg(test)]
489mod group_by_value_tests {
490    use super::*;
491    use serde::de::{DeserializeSeed, IntoDeserializer, MapAccess};
492    use serde_value::{DeserializerError, Value};
493    use std::collections::BTreeMap;
494
495    struct TestMapAccess {
496        items: Vec<(String, Value)>,
497        pos: usize,
498    }
499
500    impl TestMapAccess {
501        fn new(items: Vec<(String, Value)>) -> Self {
502            Self { items, pos: 0 }
503        }
504    }
505
506    impl<'de> MapAccess<'de> for TestMapAccess {
507        type Error = DeserializerError;
508
509        fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
510        where
511            K: DeserializeSeed<'de>,
512        {
513            if self.pos < self.items.len() {
514                let (ref key, _) = self.items[self.pos];
515                let de = key.clone().into_deserializer();
516                seed.deserialize(de).map(Some)
517            } else {
518                Ok(None)
519            }
520        }
521
522        fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
523        where
524            V: DeserializeSeed<'de>,
525        {
526            let (_, ref value) = self.items[self.pos];
527            self.pos += 1;
528            seed.deserialize(value.clone())
529        }
530    }
531
532    fn map_entry(key: &str, nested_key: &str, nested_value: i32) -> (String, Value) {
533        (
534            key.to_string(),
535            Value::Map(BTreeMap::from([(
536                Value::String(nested_key.to_string()),
537                Value::I32(nested_value),
538            )])),
539        )
540    }
541
542    fn map_entry_i32(key: &str, value: i32) -> (String, Value) {
543        (key.to_string(), Value::I32(value))
544    }
545
546    #[test]
547    fn simple() {
548        // Given
549        let items = vec![
550            map_entry("foo", "a", 1),
551            map_entry("bar", "b", 2),
552            map_entry("__BAR", "c", 3),
553        ];
554        let input = TestMapAccess::new(items);
555
556        // When
557        let grouped = Slug::group_map(input).unwrap();
558
559        // Then
560        assert_eq!(grouped.len(), 2);
561        assert!(grouped.get("foo").is_some());
562        assert!(grouped.get("bar").is_some());
563    }
564
565    #[test]
566    fn collision() {
567        // Given
568        let items = vec![
569            map_entry("A", "x", 1),
570            map_entry("a___", "y", 2),
571            map_entry("b", "z", 3),
572        ];
573        let input = TestMapAccess::new(items);
574
575        // When
576        let mut grouped = Slug::group_map(input).unwrap();
577
578        // Then
579        let (merged_slug, merged_value) = grouped.remove_entry("a").unwrap();
580        assert_eq!(merged_slug.original(), "a___"); // must be the longest original
581        match merged_value {
582            Value::Map(map) => {
583                assert_eq!(
584                    map.get(&Value::String("x".to_string())),
585                    Some(&Value::I32(1)),
586                );
587                assert_eq!(
588                    map.get(&Value::String("y".to_string())),
589                    Some(&Value::I32(2)),
590                );
591            }
592            _ => panic!("Expected merged value to be a Map"),
593        }
594    }
595
596    #[test]
597    fn nested_non_map() {
598        // Given: a map with a non-map value
599        let items = vec![map_entry_i32("foo", 1)];
600        let input = TestMapAccess::new(items);
601
602        // When
603        let result = Slug::group_map(input).unwrap();
604
605        // Then
606        assert_eq!(result.get("foo"), Some(&Value::I32(1)));
607    }
608
609    #[test]
610    fn merge_non_map() {
611        // Given
612        let items = vec![map_entry("foo", "a", 1), map_entry_i32("__FOO", 2)];
613        let input = TestMapAccess::new(items);
614
615        // When
616        let result = Slug::group_map(input);
617
618        // Then
619        assert!(result.is_err());
620    }
621
622    #[test]
623    fn empty() {
624        // Given
625        let items = vec![];
626        let input = TestMapAccess::new(items);
627
628        // When
629        let grouped = Slug::group_map(input).unwrap();
630
631        // Then
632        assert!(grouped.is_empty());
633    }
634}