embed_str/
lib.rs

1//! Short string embedding for std `str`
2
3/// Replacement of Box<[std::str::str]> for short string embedding
4///
5/// When string size is smaller than `std::mem::size_of::<usize>*2-1`,
6/// embed the string content into itself rather than holding the pointer.
7pub struct EmbeddingStr([usize; 2]);
8
9const STR_INNER_SIZE: usize = std::mem::size_of::<EmbeddingStr>();
10
11#[derive(Eq, PartialEq, Debug)]
12pub enum EmbeddingStrMode {
13    Boxed,
14    Embedded,
15}
16
17impl EmbeddingStr {
18    pub fn mode(&self) -> EmbeddingStrMode {
19        // SAFETY: std::mem::align_of::<&str>() > 1
20        if (self.0[0] & 1) == 0 {
21            EmbeddingStrMode::Boxed
22        } else {
23            EmbeddingStrMode::Embedded
24        }
25    }
26
27    pub fn as_str(&self) -> &str {
28        match self.mode() {
29            EmbeddingStrMode::Boxed => unsafe { std::mem::transmute(self.0) },
30            EmbeddingStrMode::Embedded => {
31                let embedded = unsafe { &*(self as *const Self as *const EmbeddedStr) };
32                embedded.as_str()
33            }
34        }
35    }
36}
37
38impl Drop for EmbeddingStr {
39    fn drop(&mut self) {
40        match self.mode() {
41            EmbeddingStrMode::Boxed => {
42                let _boxed: Box<str> = unsafe { std::mem::transmute(self.0) };
43            }
44            EmbeddingStrMode::Embedded => {
45                // nothing to do
46            }
47        }
48    }
49}
50
51impl std::fmt::Display for EmbeddingStr {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        std::fmt::Display::fmt(self.as_str(), f)
54    }
55}
56
57impl std::fmt::Debug for EmbeddingStr {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        write!(f, "{:?}({:?})", self.mode(), self.as_str())
60    }
61}
62
63impl From<String> for EmbeddingStr {
64    #[inline]
65    fn from(s: String) -> Self {
66        const MAX_SIZE: usize = STR_INNER_SIZE - 1;
67        // when size=0, it already has embedded form
68        Self(if (1..=MAX_SIZE).contains(&s.len()) {
69            let embedded = EmbeddedStr::from(s.as_str());
70            unsafe { std::mem::transmute(embedded) }
71        } else {
72            let boxed = s.into_boxed_str();
73            unsafe { std::mem::transmute(boxed) }
74        })
75    }
76}
77
78impl From<&'static str> for EmbeddingStr {
79    #[inline]
80    fn from(s: &str) -> Self {
81        const MAX_SIZE: usize = STR_INNER_SIZE - 1;
82        // when size=0, it already has embedded form
83        Self(if (1..=MAX_SIZE).contains(&s.len()) {
84            let embedded = EmbeddedStr::from(s);
85            unsafe { std::mem::transmute(embedded) }
86        } else {
87            let boxed = s.to_owned().into_boxed_str();
88            unsafe { std::mem::transmute(boxed) }
89        })
90    }
91}
92
93struct EmbeddedStr([u8; STR_INNER_SIZE]);
94
95impl EmbeddedStr {
96    fn as_str(&self) -> &str {
97        let ptr;
98        let encoded_len;
99        #[cfg(target_endian = "little")]
100        unsafe {
101            ptr = self.0.as_ptr().offset(1);
102            encoded_len = (*self.0.as_ptr()) as usize;
103        }
104        #[cfg(target_endian = "big")]
105        unsafe {
106            ptr = self.0.as_ptr();
107            encoded_len = *self.0.as_ptr().offset(STR_INNER_SIZE - 1);
108        }
109        let pair = [ptr as usize, encoded_len as usize >> 1];
110        unsafe { std::mem::transmute(pair) }
111    }
112}
113
114impl From<&str> for EmbeddedStr {
115    #[inline]
116    fn from(s: &str) -> Self {
117        debug_assert!(s.len() < STR_INNER_SIZE);
118        let mut new = std::mem::MaybeUninit::<Self>::uninit();
119        let mut_ptr = new.as_mut_ptr() as *mut u8;
120        let encoded_len = (s.len() << 1) as u8 + 1;
121        unsafe {
122            #[cfg(target_endian = "little")]
123            {
124                std::ptr::copy_nonoverlapping(s.as_ptr(), mut_ptr.offset(1), s.len());
125                mut_ptr.write(encoded_len);
126            }
127            #[cfg(target_endian = "big")]
128            {
129                std::ptr::copy_nonoverlapping(s.as_ptr(), mut_ptr, s.len());
130                mut_ptr.offset(MAX_SIZE).write(encoded_len);
131            }
132            new.assume_init()
133        }
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn test_lifecycle() {
143        assert_eq!(STR_INNER_SIZE, 16);
144
145        let s = EmbeddingStr::from("");
146        assert_eq!(s.mode(), EmbeddingStrMode::Embedded);
147        assert_eq!(s.as_str(), "");
148        let s = EmbeddingStr::from("a");
149        assert_eq!(s.mode(), EmbeddingStrMode::Embedded);
150        assert_eq!(s.as_str(), "a");
151        let s = EmbeddingStr::from("abcdxyz01");
152        assert_eq!(s.mode(), EmbeddingStrMode::Embedded);
153        assert_eq!(s.as_str(), "abcdxyz01");
154        let s = EmbeddingStr::from("123456789012345");
155        assert_eq!(s.mode(), EmbeddingStrMode::Embedded);
156        assert_eq!(s.as_str(), "123456789012345");
157        let s = EmbeddingStr::from("1234567890123456");
158        assert_eq!(s.mode(), EmbeddingStrMode::Boxed);
159        assert_eq!(s.as_str(), "1234567890123456");
160        let slong = EmbeddingStr::from("something longer than 15 byets");
161        assert_eq!(slong.mode(), EmbeddingStrMode::Boxed);
162        assert_eq!(slong.as_str(), "something longer than 15 byets");
163    }
164
165    #[test]
166    fn test_format() {
167        let s = EmbeddingStr::from("a");
168        assert_eq!(format!("{}", s), "a".to_owned());
169        assert_eq!(format!("{:?}", s), "Embedded(\"a\")".to_owned());
170        let s = EmbeddingStr::from("1234567890123456");
171        assert_eq!(format!("{}", s), "1234567890123456".to_owned());
172        assert_eq!(format!("{:?}", s), "Boxed(\"1234567890123456\")".to_owned());
173    }
174}