temper_core/
string.rs

1use super::{Error, List, ListedTrait, Result, ToArcString};
2use std::{cell::RefCell, sync::Arc};
3
4pub trait AsStr {
5    fn as_str(&self) -> &str;
6}
7
8impl AsStr for &str {
9    fn as_str(&self) -> &str {
10        self
11    }
12}
13
14impl AsStr for &&str {
15    fn as_str(&self) -> &str {
16        self
17    }
18}
19
20impl AsStr for String {
21    fn as_str(&self) -> &str {
22        &self
23    }
24}
25
26impl AsStr for Arc<String> {
27    fn as_str(&self) -> &str {
28        &self
29    }
30}
31
32impl AsStr for &Arc<String> {
33    fn as_str(&self) -> &str {
34        &self
35    }
36}
37
38pub mod builder {
39    use crate::{string::AsStr, Error, Result};
40    use std::{sync::Arc, sync::RwLock};
41
42    pub fn append(builder: &Arc<RwLock<String>>, text: impl AsStr) {
43        let mut builder = builder.write().unwrap();
44        let text = text.as_str();
45        builder.push_str(text);
46    }
47
48    pub fn append_between(
49        builder: &Arc<RwLock<String>>,
50        text: impl AsStr,
51        begin: usize,
52        end: usize,
53    ) {
54        let text = text.as_str();
55        append(builder, &text[begin..end]);
56    }
57
58    pub fn append_code_point(builder: &Arc<RwLock<String>>, code: i32) -> Result<()> {
59        let mut builder = builder.write().unwrap();
60        builder.push(char::from_u32(code as u32).ok_or(Error::new())?);
61        Ok(())
62    }
63
64    pub fn to_string(builder: &Arc<RwLock<String>>) -> Arc<String> {
65        let builder = builder.read().unwrap();
66        Arc::new(builder.clone())
67    }
68}
69
70pub fn cast_none_as_index_option(_none: ()) -> Option<usize> {
71    // Boring but convenient to have as a function.
72    None
73}
74
75pub fn cast_as_index(option: Option<usize>) -> Option<usize> {
76    // Boring but convenient to have as a function.
77    option
78}
79
80pub fn cast_as_no_index(option: Option<usize>) -> Option<()> {
81    // Swaperoo.
82    match option {
83        Some(_) => None,
84        None => Some(()),
85    }
86}
87
88fn char_boundary_ceil(text: impl AsStr, mut index: usize) -> usize {
89    let text = text.as_str();
90    // Nightly: https://doc.rust-lang.org/std/string/struct.String.html#method.ceil_char_boundary
91    if index > text.len() {
92        return text.len();
93    }
94    while index < text.len() && !text.is_char_boundary(index) {
95        index += 1;
96    }
97    index
98}
99
100fn char_boundary_floor(text: impl AsStr, mut index: usize) -> usize {
101    let text = text.as_str();
102    // Nightly: https://doc.rust-lang.org/std/string/struct.String.html#method.floor_char_boundary
103    while index > 0 && !text.is_char_boundary(index) {
104        index -= 1;
105    }
106    index
107}
108
109pub fn count_between(text: impl AsStr, begin: usize, end: usize) -> i32 {
110    let text = text.as_str();
111    let begin = char_boundary_ceil(text, begin);
112    let end = char_boundary_floor(text, end);
113    if begin >= end {
114        return 0;
115    }
116    text[begin..end].chars().count().try_into().unwrap()
117}
118
119pub fn for_each(text: impl AsStr, action: &dyn Fn(i32)) {
120    let text = text.as_str();
121    for code in text.chars() {
122        // And valid unicode always fits in i32.
123        action(code as i32);
124    }
125}
126
127// But functions here still produce Arc<String> for convenience.
128pub fn from_code_point(code: i32) -> Result<Arc<String>> {
129    Ok(char::from_u32(code as u32)
130        .ok_or(Error::new())?
131        .to_arc_string())
132}
133
134pub fn from_code_points(codes: &dyn ListedTrait<i32>) -> Result<Arc<String>> {
135    let result: RefCell<Option<String>> = RefCell::new(None);
136    codes.with_vec(&|values: &Vec<i32>| {
137        result.replace(
138            values
139                .iter()
140                .map(|&code| std::char::from_u32(code as u32))
141                .collect(),
142        );
143    });
144    Ok(Arc::new(result.into_inner().ok_or(Error::new())?))
145}
146
147pub fn get(text: impl AsStr, index: usize) -> i32 {
148    let text = text.as_str();
149    if index >= text.len() {
150        panic!();
151    }
152    let index = char_boundary_floor(text, index);
153    text.get(index..).unwrap().chars().next().unwrap() as i32
154}
155
156pub fn has_at_least(text: impl AsStr, begin: usize, end: usize, min_count: i32) -> bool {
157    let text = text.as_str();
158    let begin = char_boundary_ceil(text, begin);
159    let end = char_boundary_floor(text, end);
160    if begin >= end {
161        return min_count == 0;
162    }
163    let mut count = 0;
164    for _ in text[begin..end].chars() {
165        count += 1;
166        if count >= min_count {
167            return true;
168        }
169    }
170    false
171}
172
173pub fn has_index(text: impl AsStr, index: usize) -> bool {
174    let text = text.as_str();
175    index < text.len()
176}
177
178pub fn index_of(text: impl AsStr, target: impl AsStr, start: Option<usize>) -> Option<usize> {
179    let text = text.as_str();
180    let target = target.as_str();
181    let start = start.unwrap_or(0);
182    text[start..].find(target).map(|i| i + start)
183}
184
185pub fn next(text: impl AsStr, index: usize) -> usize {
186    let text = text.as_str();
187    // TODO Worry about max usize value?
188    char_boundary_ceil(text, index + 1)
189}
190
191pub fn prev(text: impl AsStr, index: usize) -> usize {
192    let text = text.as_str();
193    if index == 0 {
194        return 0;
195    }
196    char_boundary_floor(text, index - 1)
197}
198
199pub fn step(text: impl AsStr, index: usize, by: i32) -> usize {
200    let text = text.as_str();
201    let mut new_index = index;
202    if by >= 0 {
203        for _ in 0..by {
204            let old_index = new_index;
205            new_index = next(text, new_index);
206            if new_index == old_index {
207                break;
208            }
209        }
210    } else {
211        for _ in 0..-by {
212            let old_index = new_index;
213            new_index = prev(text, new_index);
214            if new_index == old_index {
215                break;
216            }
217        }
218    }
219    new_index
220}
221
222// TODO Should this one take Arc<String> in case of full string slice?
223pub fn slice(text: impl AsStr, begin: usize, end: usize) -> Arc<String> {
224    let text = text.as_str();
225    let begin = char_boundary_ceil(text, begin);
226    let end = char_boundary_floor(text, end);
227    if begin >= end {
228        return "".to_arc_string();
229    }
230    text[begin..end].to_arc_string()
231}
232
233pub fn split(text: impl AsStr, sep: impl AsStr) -> List<Arc<String>> {
234    let text = text.as_str();
235    let sep = sep.as_str();
236    let parts = if sep.is_empty() {
237        text.chars().map(|x| Arc::new(x.to_string())).collect()
238    } else {
239        text.split(sep).map(|x| Arc::new(x.to_string())).collect()
240    };
241    Arc::new(parts)
242}
243
244pub fn to_float64(text: impl AsStr) -> Result<f64> {
245    let text = text.as_str();
246    // See: https://doc.rust-lang.org/std/primitive.f64.html#method.from_str
247    // We're more flexible in trimming than Rust.
248    let text = text.trim();
249    // And we can consider bytes because all valid numbers for us are ascii.
250    let bytes = text.as_bytes();
251    let Some(last) = bytes.last() else {
252        return Err(Error::new());
253    };
254    // But less flexible in main content.
255    let ok = match last {
256        b'0'..=b'9' => match bytes.iter().position(|&b| b == b'.') {
257            Some(dot) => dot > 0 && (b'0'..=b'9').contains(&bytes[dot - 1]),
258            None => true,
259        },
260        b'N' => text.ends_with("NaN"),
261        b'y' => text.ends_with("Infinity"),
262        _ => false,
263    };
264    match ok {
265        true => text.parse().map_err(|e| Error::with_source(Arc::new(e))),
266        false => Err(Error::new()),
267    }
268}
269
270pub fn to_int(text: impl AsStr, radix: Option<i32>) -> Result<i32> {
271    let text = text.as_str();
272    let radix = radix.unwrap_or(10);
273    if radix < 2 || radix > 36 {
274        // This panics in Rust. TODO Should it panic in Temper?
275        return Err(Error::new());
276    }
277    // See rules here: https://github.com/rust-lang/rust/blob/01e2fff90c7ed19e1d9fb828ebc012e7b9732297/library/core/src/num/mod.rs#L1439
278    i32::from_str_radix(text.trim(), radix as u32).map_err(|e| Error::with_source(Arc::new(e)))
279}
280
281pub fn to_int64(text: impl AsStr, radix: Option<i32>) -> Result<i64> {
282    let text = text.as_str();
283    let radix = radix.unwrap_or(10);
284    if radix < 2 || radix > 36 {
285        // This panics in Rust. TODO Should it panic in Temper?
286        return Err(Error::new());
287    }
288    // See rules here: https://github.com/rust-lang/rust/blob/01e2fff90c7ed19e1d9fb828ebc012e7b9732297/library/core/src/num/mod.rs#L1439
289    i64::from_str_radix(text.trim(), radix as u32).map_err(|e| Error::with_source(Arc::new(e)))
290}