ellidri_unicase/
lib.rs

1//! Wrapper around str that makes comparisons case-insensitive.
2//!
3//! Intended for use within a `HashMap`.  Actually used by ellidri's `State`.  It doesn't support
4//! Unicode case-folding for now.
5//!
6//! The wrapper is named `UniCase`.  It implements traits so that `&UniCase<str>` behaves like
7//! `&str`, and `UniCase<String>` behaves like `String`, except for the comparisons of course,
8//! which are case-insensitive.
9//!
10//! "Case-insensitivity" is defined by the `CaseMapping` trait.  This trait defines how characters
11//! and bytes should match.  Currently, the following case mappings are available:
12//!
13//! - `Ascii` (default): matches ascii lower case letters with their ascii upper case counterparts,
14//! - `Rfc1459`: same as `Ascii`, but also matches `{}|^` with `[]\~`.
15//! - `Rfc1459Strict`: same as `Ascii`, but also matches `{}|` with `[]\`.
16//!
17//! Currently, `rfc7613` is not implemented.
18//!
19//! # Usage
20//!
21//! ```rust
22//! use ellidri_unicase::{u, UniCase};
23//! use std::collections::HashSet;
24//!
25//! let mut channels = HashSet::new();
26//! channels.insert(UniCase::new("#Games".to_owned()));
27//!
28//! assert!(channels.contains(u("#gameS")));
29//! assert!(!channels.contains(u("#Gaming")));
30//!
31//! assert_eq!(u("hello!"), u("HELLO!"));
32//! ```
33
34#![warn(clippy::all, rust_2018_idioms)]
35#![allow(clippy::filter_map, clippy::find_map, clippy::shadow_unrelated, clippy::use_self)]
36
37use std::fmt;
38use std::borrow::Borrow;
39use std::hash::{Hash, Hasher};
40use std::marker::PhantomData;
41
42/// Definition of case mappings.
43pub trait CaseMapping {
44    /// For the given byte, returns an arbitrary byte that will be the same for all bytes that
45    /// match the given byte.
46    ///
47    /// Easy, right?
48    ///
49    /// It means that, for all bytes that should match, this function returns the same byte.  If
50    /// two bytes don't match, this function will return two different bytes.  In practice, it
51    /// converts bytes to their lowercase equivalent.
52    ///
53    /// # Example
54    ///
55    /// With the Ascii case mapping,
56    ///
57    /// ```rust
58    /// # use ellidri_unicase::{Ascii, CaseMapping};
59    /// assert!(Ascii::canonical_byte(b'a') == Ascii::canonical_byte(b'A'));
60    /// assert!(Ascii::canonical_byte(b'a') != Ascii::canonical_byte(b'B'));
61    /// ```
62    fn canonical_byte(b: u8) -> u8;
63}
64
65/// ASCII case mapping.
66#[derive(Debug)]
67pub struct Ascii;
68
69impl CaseMapping for Ascii {
70    fn canonical_byte(b: u8) -> u8 {
71        b.to_ascii_lowercase()
72    }
73}
74
75/// rfc1459-strict case mapping.
76pub struct Rfc1459Strict;
77
78impl CaseMapping for Rfc1459Strict {
79    fn canonical_byte(b: u8) -> u8 {
80        match b {
81            b'[' => b'{',
82            b']' => b'}',
83            b'\\' => b'\\',
84            b => Ascii::canonical_byte(b),
85        }
86    }
87}
88
89/// rfc1459 case mapping.
90pub struct Rfc1459;
91
92impl CaseMapping for Rfc1459 {
93    fn canonical_byte(b: u8) -> u8 {
94        match b {
95            b'~' => b'^',
96            b => Rfc1459Strict::canonical_byte(b),
97        }
98    }
99}
100
101/// Case-insensitive wrapper around strings.
102///
103/// See the crate-level documentation for more information and usage examples.
104#[repr(transparent)]
105pub struct UniCase<S: ?Sized, C: CaseMapping = Ascii>(PhantomData<C>, S);
106
107impl<S, C> UniCase<S, C>
108    where C: CaseMapping,
109{
110    /// Wraps the given value into `UniCase`, and "make it" case-insensitive.
111    ///
112    /// Use this to make `UniCase<String>` for example.  If you need to wrap a `&str`, you might
113    /// want to use `u` instead, or `&UniCase<str>::from`.
114    pub fn new(s: S) -> Self {
115        UniCase(PhantomData, s)
116    }
117
118    /// Consume the case-insensitive wrapper and returns the underlying value.
119    pub fn into_inner(self) -> S {
120        self.1
121    }
122}
123
124impl<S, C> UniCase<S, C>
125    where S: ?Sized,
126          C: CaseMapping,
127{
128    /// Returns a reference to the underlying value.
129    pub fn get(&self) -> &S {
130        &self.1
131    }
132}
133
134impl<'a, C> From<&'a str> for &'a UniCase<str, C>
135    where C: CaseMapping,
136{
137    fn from(s: &'a str) -> &'a UniCase<str, C> {
138        // Because of #[repr(transparent)],
139        // Unicase<str> and str have the same memory representation
140        // So the cast `as *const Unicase<str>` must work.
141        unsafe { &*(s as *const str as *const UniCase<str, C>) }
142    }
143}
144
145/// Converts a `&str` into a `&UniCase<str, Ascii>`.
146///
147/// Shorthand for `<&Unicase<str, Ascii>>::from`.
148pub fn u(s: &str) -> &UniCase<str> {
149    s.into()
150}
151
152impl<S, C> AsRef<UniCase<str, C>> for UniCase<S, C>
153    where S: AsRef<str> + ?Sized,
154          C: CaseMapping,
155{
156    fn as_ref(&self) -> &UniCase<str, C> {
157        self.1.as_ref().into()
158    }
159}
160
161impl<S, C> Borrow<UniCase<str, C>> for UniCase<S, C>
162    where S: Borrow<str>,
163          C: CaseMapping,
164{
165    fn borrow(&self) -> &UniCase<str, C> {
166        self.1.borrow().into()
167    }
168}
169
170impl<S, C> Hash for UniCase<S, C>
171    where S: AsRef<str> + ?Sized,
172          C: CaseMapping,
173{
174    fn hash<H: Hasher>(&self, hasher: &mut H) {
175        let bytes = self.1.as_ref().as_bytes();
176        for &byte in bytes {
177            hasher.write_u8(C::canonical_byte(byte));
178        }
179    }
180}
181
182impl<S1, S2, C> PartialEq<UniCase<S2, C>> for UniCase<S1, C>
183    where S1: AsRef<str> + ?Sized,
184          S2: AsRef<str> + ?Sized,
185          C: CaseMapping,
186{
187    fn eq(&self, other: &UniCase<S2, C>) -> bool {
188        let me = self.1.as_ref().as_bytes();
189        let you = other.1.as_ref().as_bytes();
190        me.len() == you.len() && me.iter().zip(you).all(|(&a, &b)| {
191            C::canonical_byte(a) == C::canonical_byte(b)
192        })
193    }
194}
195
196impl<S, C> Eq for UniCase<S, C>
197    where S: AsRef<str> + ?Sized,
198          C: CaseMapping,
199{}
200
201impl<S> fmt::Debug for UniCase<S, Ascii>
202    where S: fmt::Debug + ?Sized,
203{
204    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205        write!(f, "UniCase<Ascii>({:?})", &self.1)
206    }
207}
208
209impl<S> fmt::Debug for UniCase<S, Rfc1459>
210    where S: fmt::Debug + ?Sized,
211{
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        write!(f, "UniCase<Rfc1459>({:?})", &self.1)
214    }
215}
216
217impl<S> fmt::Debug for UniCase<S, Rfc1459Strict>
218    where S: fmt::Debug + ?Sized,
219{
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        write!(f, "UniCase<Rfc1459Strict>({:?})", &self.1)
222    }
223}