ellidri_unicase/lib.rs
1//! Wrapper around str that makes comparisons case-insensitive.
2//!
3//! Intended for use within a `HashMap`. Actually used by ellidri's `State`. It doesn't support
4//! Unicode case-folding for now.
5//!
6//! The wrapper is named `UniCase`. It implements traits so that `&UniCase<str>` behaves like
7//! `&str`, and `UniCase<String>` behaves like `String`, except for the comparisons of course,
8//! which are case-insensitive.
9//!
10//! "Case-insensitivity" is defined by the `CaseMapping` trait. This trait defines how characters
11//! and bytes should match. Currently, the following case mappings are available:
12//!
13//! - `Ascii` (default): matches ascii lower case letters with their ascii upper case counterparts,
14//! - `Rfc1459`: same as `Ascii`, but also matches `{}|^` with `[]\~`.
15//! - `Rfc1459Strict`: same as `Ascii`, but also matches `{}|` with `[]\`.
16//!
17//! Currently, `rfc7613` is not implemented.
18//!
19//! # Usage
20//!
21//! ```rust
22//! use ellidri_unicase::{u, UniCase};
23//! use std::collections::HashSet;
24//!
25//! let mut channels = HashSet::new();
26//! channels.insert(UniCase::new("#Games".to_owned()));
27//!
28//! assert!(channels.contains(u("#gameS")));
29//! assert!(!channels.contains(u("#Gaming")));
30//!
31//! assert_eq!(u("hello!"), u("HELLO!"));
32//! ```
33
34#![warn(clippy::all, rust_2018_idioms)]
35#![allow(clippy::filter_map, clippy::find_map, clippy::shadow_unrelated, clippy::use_self)]
36
37use std::fmt;
38use std::borrow::Borrow;
39use std::hash::{Hash, Hasher};
40use std::marker::PhantomData;
41
42/// Definition of case mappings.
43pub trait CaseMapping {
44 /// For the given byte, returns an arbitrary byte that will be the same for all bytes that
45 /// match the given byte.
46 ///
47 /// Easy, right?
48 ///
49 /// It means that, for all bytes that should match, this function returns the same byte. If
50 /// two bytes don't match, this function will return two different bytes. In practice, it
51 /// converts bytes to their lowercase equivalent.
52 ///
53 /// # Example
54 ///
55 /// With the Ascii case mapping,
56 ///
57 /// ```rust
58 /// # use ellidri_unicase::{Ascii, CaseMapping};
59 /// assert!(Ascii::canonical_byte(b'a') == Ascii::canonical_byte(b'A'));
60 /// assert!(Ascii::canonical_byte(b'a') != Ascii::canonical_byte(b'B'));
61 /// ```
62 fn canonical_byte(b: u8) -> u8;
63}
64
65/// ASCII case mapping.
66#[derive(Debug)]
67pub struct Ascii;
68
69impl CaseMapping for Ascii {
70 fn canonical_byte(b: u8) -> u8 {
71 b.to_ascii_lowercase()
72 }
73}
74
75/// rfc1459-strict case mapping.
76pub struct Rfc1459Strict;
77
78impl CaseMapping for Rfc1459Strict {
79 fn canonical_byte(b: u8) -> u8 {
80 match b {
81 b'[' => b'{',
82 b']' => b'}',
83 b'\\' => b'\\',
84 b => Ascii::canonical_byte(b),
85 }
86 }
87}
88
89/// rfc1459 case mapping.
90pub struct Rfc1459;
91
92impl CaseMapping for Rfc1459 {
93 fn canonical_byte(b: u8) -> u8 {
94 match b {
95 b'~' => b'^',
96 b => Rfc1459Strict::canonical_byte(b),
97 }
98 }
99}
100
101/// Case-insensitive wrapper around strings.
102///
103/// See the crate-level documentation for more information and usage examples.
104#[repr(transparent)]
105pub struct UniCase<S: ?Sized, C: CaseMapping = Ascii>(PhantomData<C>, S);
106
107impl<S, C> UniCase<S, C>
108 where C: CaseMapping,
109{
110 /// Wraps the given value into `UniCase`, and "make it" case-insensitive.
111 ///
112 /// Use this to make `UniCase<String>` for example. If you need to wrap a `&str`, you might
113 /// want to use `u` instead, or `&UniCase<str>::from`.
114 pub fn new(s: S) -> Self {
115 UniCase(PhantomData, s)
116 }
117
118 /// Consume the case-insensitive wrapper and returns the underlying value.
119 pub fn into_inner(self) -> S {
120 self.1
121 }
122}
123
124impl<S, C> UniCase<S, C>
125 where S: ?Sized,
126 C: CaseMapping,
127{
128 /// Returns a reference to the underlying value.
129 pub fn get(&self) -> &S {
130 &self.1
131 }
132}
133
134impl<'a, C> From<&'a str> for &'a UniCase<str, C>
135 where C: CaseMapping,
136{
137 fn from(s: &'a str) -> &'a UniCase<str, C> {
138 // Because of #[repr(transparent)],
139 // Unicase<str> and str have the same memory representation
140 // So the cast `as *const Unicase<str>` must work.
141 unsafe { &*(s as *const str as *const UniCase<str, C>) }
142 }
143}
144
145/// Converts a `&str` into a `&UniCase<str, Ascii>`.
146///
147/// Shorthand for `<&Unicase<str, Ascii>>::from`.
148pub fn u(s: &str) -> &UniCase<str> {
149 s.into()
150}
151
152impl<S, C> AsRef<UniCase<str, C>> for UniCase<S, C>
153 where S: AsRef<str> + ?Sized,
154 C: CaseMapping,
155{
156 fn as_ref(&self) -> &UniCase<str, C> {
157 self.1.as_ref().into()
158 }
159}
160
161impl<S, C> Borrow<UniCase<str, C>> for UniCase<S, C>
162 where S: Borrow<str>,
163 C: CaseMapping,
164{
165 fn borrow(&self) -> &UniCase<str, C> {
166 self.1.borrow().into()
167 }
168}
169
170impl<S, C> Hash for UniCase<S, C>
171 where S: AsRef<str> + ?Sized,
172 C: CaseMapping,
173{
174 fn hash<H: Hasher>(&self, hasher: &mut H) {
175 let bytes = self.1.as_ref().as_bytes();
176 for &byte in bytes {
177 hasher.write_u8(C::canonical_byte(byte));
178 }
179 }
180}
181
182impl<S1, S2, C> PartialEq<UniCase<S2, C>> for UniCase<S1, C>
183 where S1: AsRef<str> + ?Sized,
184 S2: AsRef<str> + ?Sized,
185 C: CaseMapping,
186{
187 fn eq(&self, other: &UniCase<S2, C>) -> bool {
188 let me = self.1.as_ref().as_bytes();
189 let you = other.1.as_ref().as_bytes();
190 me.len() == you.len() && me.iter().zip(you).all(|(&a, &b)| {
191 C::canonical_byte(a) == C::canonical_byte(b)
192 })
193 }
194}
195
196impl<S, C> Eq for UniCase<S, C>
197 where S: AsRef<str> + ?Sized,
198 C: CaseMapping,
199{}
200
201impl<S> fmt::Debug for UniCase<S, Ascii>
202 where S: fmt::Debug + ?Sized,
203{
204 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205 write!(f, "UniCase<Ascii>({:?})", &self.1)
206 }
207}
208
209impl<S> fmt::Debug for UniCase<S, Rfc1459>
210 where S: fmt::Debug + ?Sized,
211{
212 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213 write!(f, "UniCase<Rfc1459>({:?})", &self.1)
214 }
215}
216
217impl<S> fmt::Debug for UniCase<S, Rfc1459Strict>
218 where S: fmt::Debug + ?Sized,
219{
220 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221 write!(f, "UniCase<Rfc1459Strict>({:?})", &self.1)
222 }
223}