1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
//! Implementations of the [`FromIterator`] trait to make building [`Repr`]s more ergonomic

use core::iter::FromIterator;
use std::borrow::Cow;

use super::{
    InlineBuffer,
    Repr,
    MAX_SIZE,
};
use crate::CompactString;

impl FromIterator<char> for Repr {
    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
        let mut iter = iter.into_iter();

        // If the size hint indicates we can't store this inline, then create a heap string
        let (size_hint, _) = iter.size_hint();
        if size_hint > MAX_SIZE {
            return Repr::from_string(iter.collect(), true);
        }

        // Otherwise, continuously pull chars from the iterator
        let mut curr_len = 0;
        let mut inline_buf = InlineBuffer::new_const("");
        while let Some(c) = iter.next() {
            let char_len = c.len_utf8();

            // If this new character is too large to fit into the inline buffer, then create a heap
            // string
            if char_len + curr_len > MAX_SIZE {
                let (min_remaining, _) = iter.size_hint();
                let mut string = String::with_capacity(char_len + curr_len + min_remaining);

                // push existing characters onto the heap
                // SAFETY: `inline_buf` has been filled with `char`s which are valid UTF-8
                string
                    .push_str(unsafe { core::str::from_utf8_unchecked(&inline_buf.0[..curr_len]) });
                // push current char onto the heap
                string.push(c);
                // extend heap with remaining characters
                string.extend(iter);

                return Repr::from_string(string, true);
            }

            // write the current char into a slice of the unoccupied space
            c.encode_utf8(&mut inline_buf.0[curr_len..]);
            curr_len += char_len;
        }

        // SAFETY: Everything we just pushed onto the buffer is a `str` which is valid UTF-8
        unsafe { inline_buf.set_len(curr_len) }

        Repr::from_inline(inline_buf)
    }
}

impl<'a> FromIterator<&'a char> for Repr {
    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
        iter.into_iter().copied().collect()
    }
}

fn from_as_ref_str_iterator<S, I>(mut iter: I) -> Repr
where
    S: AsRef<str>,
    I: Iterator<Item = S>,
    String: core::iter::Extend<S>,
    String: FromIterator<S>,
{
    // Note: We don't check the lower bound here like we do in the character iterator because it's
    // possible for the iterator to be full of empty strings! In which case checking the lower bound
    // could cause us to heap allocate when there's no need.

    // Continuously pull strings from the iterator
    let mut curr_len = 0;
    let mut inline_buf = InlineBuffer::new_const("");
    while let Some(s) = iter.next() {
        let str_slice = s.as_ref();
        let bytes_len = str_slice.len();

        // this new string is too large to fit into our inline buffer, so heap allocate the rest
        if bytes_len + curr_len > MAX_SIZE {
            let (min_remaining, _) = iter.size_hint();
            let mut string = String::with_capacity(bytes_len + curr_len + min_remaining);

            // push existing strings onto the heap
            // SAFETY: `inline_buf` has been filled with `&str`s which are valid UTF-8
            string.push_str(unsafe { core::str::from_utf8_unchecked(&inline_buf.0[..curr_len]) });
            // push current string onto the heap
            string.push_str(str_slice);
            // extend heap with remaining strings
            string.extend(iter);

            return Repr::from_string(string, true);
        }

        // write the current string into a slice of the unoccupied space
        inline_buf.0[curr_len..][..bytes_len].copy_from_slice(str_slice.as_bytes());
        curr_len += bytes_len;
    }

    // SAFETY: Everything we just pushed onto the buffer is a `str` which is valid UTF-8
    unsafe { inline_buf.set_len(curr_len) }

    Repr::from_inline(inline_buf)
}

impl<'a> FromIterator<&'a str> for Repr {
    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl FromIterator<Box<str>> for Repr {
    fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl FromIterator<String> for Repr {
    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl FromIterator<CompactString> for Repr {
    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl<'a> FromIterator<Cow<'a, str>> for Repr {
    fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

#[cfg(test)]
mod tests {
    use super::Repr;

    #[test]
    fn short_char_iter() {
        let chars = ['a', 'b', 'c'];
        let repr: Repr = chars.iter().collect();

        assert_eq!(repr.as_str(), "abc");
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    fn short_char_ref_iter() {
        let chars = ['a', 'b', 'c'];
        let repr: Repr = chars.iter().collect();

        assert_eq!(repr.as_str(), "abc");
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    #[cfg_attr(target_pointer_width = "32", ignore)]
    fn packed_char_iter() {
        let chars = [
            '\u{92f01}',
            '\u{81515}',
            '\u{81515}',
            '\u{81515}',
            '\u{81515}',
            '\u{41515}',
        ];

        let repr: Repr = chars.iter().collect();
        let s: String = chars.iter().collect();

        assert_eq!(repr.as_str(), s.as_str());
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    fn long_char_iter() {
        let long = "This is supposed to be a really long string";
        let repr: Repr = long.chars().collect();

        assert_eq!(repr.as_str(), "This is supposed to be a really long string");
        assert!(repr.is_heap_allocated());
    }

    #[test]
    fn short_string_iter() {
        let strings = vec!["hello", "world"];
        let repr: Repr = strings.into_iter().collect();

        assert_eq!(repr.as_str(), "helloworld");
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    fn long_short_string_iter() {
        let strings = vec![
            "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16",
            "17", "18", "19", "20",
        ];
        let repr: Repr = strings.into_iter().collect();

        assert_eq!(repr.as_str(), "1234567891011121314151617181920");
        assert!(repr.is_heap_allocated());
    }
}