Skip to main content

philiprehberger_str_utils/
lib.rs

1//! String manipulation utilities — truncation, case conversion, padding, and whitespace operations.
2//!
3//! This crate provides a [`StrExt`] trait that extends `&str` and `String` with convenient
4//! string manipulation methods.
5//!
6//! # Usage
7//!
8//! ```
9//! use philiprehberger_str_utils::StrExt;
10//!
11//! assert_eq!("Hello, World!".truncate_ellipsis(8), "Hello...");
12//! assert_eq!("hello world".to_camel_case(), "helloWorld");
13//! assert_eq!("hi".pad_left(5, ' '), "   hi");
14//! assert_eq!("  hello   world  ".squish(), "hello world");
15//! ```
16
17use unicode_width::UnicodeWidthStr;
18
19/// Split a string into words by detecting boundaries at underscores, hyphens, spaces,
20/// and camelCase transitions.
21fn split_words(s: &str) -> Vec<String> {
22    let mut words: Vec<String> = Vec::new();
23    let mut current = String::new();
24
25    let chars: Vec<char> = s.chars().collect();
26    let len = chars.len();
27
28    let mut i = 0;
29    while i < len {
30        let c = chars[i];
31
32        // Delimiters: split and skip
33        if c == '_' || c == '-' || c == ' ' {
34            if !current.is_empty() {
35                words.push(current.clone());
36                current.clear();
37            }
38            i += 1;
39            continue;
40        }
41
42        if c.is_uppercase() {
43            // Check if this starts a new word
44            if !current.is_empty() {
45                // Look ahead: if we have consecutive uppercase followed by lowercase,
46                // e.g., "HTMLParser" at 'P', the previous uppercase run is one word.
47                let prev_is_upper = i > 0 && chars[i - 1].is_uppercase();
48                if prev_is_upper {
49                    // We're in an uppercase run. If next char is lowercase, the current
50                    // uppercase char starts a new word (e.g., "HTML|Parser").
51                    if i + 1 < len && chars[i + 1].is_lowercase() {
52                        // Split: everything before this char is one word
53                        words.push(current.clone());
54                        current.clear();
55                        current.push(c);
56                    } else {
57                        // Continue the uppercase run
58                        current.push(c);
59                    }
60                } else {
61                    // Transition from lowercase to uppercase — new word
62                    words.push(current.clone());
63                    current.clear();
64                    current.push(c);
65                }
66            } else {
67                current.push(c);
68            }
69        } else {
70            current.push(c);
71        }
72
73        i += 1;
74    }
75
76    if !current.is_empty() {
77        words.push(current);
78    }
79
80    words
81}
82
83/// Extension trait providing string manipulation methods.
84///
85/// Implemented for `&str` and `String`.
86pub trait StrExt {
87    /// Returns the string slice to operate on.
88    fn as_str_ext(&self) -> &str;
89
90    /// Truncate to `max_len` characters, appending "..." if truncated.
91    ///
92    /// Unicode-safe: operates on char boundaries.
93    ///
94    /// ```
95    /// use philiprehberger_str_utils::StrExt;
96    /// assert_eq!("Hello, World!".truncate_ellipsis(8), "Hello...");
97    /// assert_eq!("Short".truncate_ellipsis(10), "Short");
98    /// ```
99    fn truncate_ellipsis(&self, max_len: usize) -> String {
100        self.truncate_with(max_len, "...")
101    }
102
103    /// Truncate to `max_len` characters, appending `suffix` if truncated.
104    ///
105    /// If the string is already within `max_len`, it is returned unchanged.
106    /// The suffix is included in the `max_len` budget.
107    ///
108    /// ```
109    /// use philiprehberger_str_utils::StrExt;
110    /// assert_eq!("Hello, World!".truncate_with(8, "~~"), "Hello,~~");
111    /// ```
112    fn truncate_with(&self, max_len: usize, suffix: &str) -> String {
113        let s = self.as_str_ext();
114        let char_count = s.chars().count();
115        if char_count <= max_len {
116            return s.to_string();
117        }
118        let suffix_len = suffix.chars().count();
119        if max_len <= suffix_len {
120            return suffix.chars().take(max_len).collect();
121        }
122        let keep = max_len - suffix_len;
123        let mut result: String = s.chars().take(keep).collect();
124        result.push_str(suffix);
125        result
126    }
127
128    /// Convert to camelCase.
129    ///
130    /// ```
131    /// use philiprehberger_str_utils::StrExt;
132    /// assert_eq!("hello world".to_camel_case(), "helloWorld");
133    /// assert_eq!("foo_bar".to_camel_case(), "fooBar");
134    /// ```
135    fn to_camel_case(&self) -> String {
136        let words = split_words(self.as_str_ext());
137        let mut result = String::new();
138        for (i, word) in words.iter().enumerate() {
139            if i == 0 {
140                result.push_str(&word.to_lowercase());
141            } else {
142                let mut chars = word.chars();
143                if let Some(first) = chars.next() {
144                    result.extend(first.to_uppercase());
145                    result.push_str(&chars.as_str().to_lowercase());
146                }
147            }
148        }
149        result
150    }
151
152    /// Convert to PascalCase.
153    ///
154    /// ```
155    /// use philiprehberger_str_utils::StrExt;
156    /// assert_eq!("hello world".to_pascal_case(), "HelloWorld");
157    /// ```
158    fn to_pascal_case(&self) -> String {
159        let words = split_words(self.as_str_ext());
160        let mut result = String::new();
161        for word in &words {
162            let mut chars = word.chars();
163            if let Some(first) = chars.next() {
164                result.extend(first.to_uppercase());
165                result.push_str(&chars.as_str().to_lowercase());
166            }
167        }
168        result
169    }
170
171    /// Convert to snake_case.
172    ///
173    /// ```
174    /// use philiprehberger_str_utils::StrExt;
175    /// assert_eq!("helloWorld".to_snake_case(), "hello_world");
176    /// assert_eq!("Hello World".to_snake_case(), "hello_world");
177    /// ```
178    fn to_snake_case(&self) -> String {
179        let words = split_words(self.as_str_ext());
180        words
181            .iter()
182            .map(|w| w.to_lowercase())
183            .collect::<Vec<_>>()
184            .join("_")
185    }
186
187    /// Convert to kebab-case.
188    ///
189    /// ```
190    /// use philiprehberger_str_utils::StrExt;
191    /// assert_eq!("helloWorld".to_kebab_case(), "hello-world");
192    /// ```
193    fn to_kebab_case(&self) -> String {
194        let words = split_words(self.as_str_ext());
195        words
196            .iter()
197            .map(|w| w.to_lowercase())
198            .collect::<Vec<_>>()
199            .join("-")
200    }
201
202    /// Convert to SCREAMING_SNAKE_CASE.
203    ///
204    /// ```
205    /// use philiprehberger_str_utils::StrExt;
206    /// assert_eq!("helloWorld".to_screaming_snake(), "HELLO_WORLD");
207    /// ```
208    fn to_screaming_snake(&self) -> String {
209        let words = split_words(self.as_str_ext());
210        words
211            .iter()
212            .map(|w| w.to_uppercase())
213            .collect::<Vec<_>>()
214            .join("_")
215    }
216
217    /// Convert to Title Case.
218    ///
219    /// ```
220    /// use philiprehberger_str_utils::StrExt;
221    /// assert_eq!("hello world".to_title_case(), "Hello World");
222    /// ```
223    fn to_title_case(&self) -> String {
224        let words = split_words(self.as_str_ext());
225        let mut parts = Vec::new();
226        for word in &words {
227            let mut chars = word.chars();
228            if let Some(first) = chars.next() {
229                let mut titled = String::new();
230                titled.extend(first.to_uppercase());
231                titled.push_str(&chars.as_str().to_lowercase());
232                parts.push(titled);
233            }
234        }
235        parts.join(" ")
236    }
237
238    /// Left-pad the string to `width` using `fill`, based on Unicode display width.
239    ///
240    /// ```
241    /// use philiprehberger_str_utils::StrExt;
242    /// assert_eq!("hi".pad_left(5, ' '), "   hi");
243    /// ```
244    fn pad_left(&self, width: usize, fill: char) -> String {
245        let s = self.as_str_ext();
246        let current_width = UnicodeWidthStr::width(s);
247        if current_width >= width {
248            return s.to_string();
249        }
250        let padding = width - current_width;
251        let mut result = String::new();
252        for _ in 0..padding {
253            result.push(fill);
254        }
255        result.push_str(s);
256        result
257    }
258
259    /// Right-pad the string to `width` using `fill`, based on Unicode display width.
260    ///
261    /// ```
262    /// use philiprehberger_str_utils::StrExt;
263    /// assert_eq!("hi".pad_right(5, '.'), "hi...");
264    /// ```
265    fn pad_right(&self, width: usize, fill: char) -> String {
266        let s = self.as_str_ext();
267        let current_width = UnicodeWidthStr::width(s);
268        if current_width >= width {
269            return s.to_string();
270        }
271        let padding = width - current_width;
272        let mut result = s.to_string();
273        for _ in 0..padding {
274            result.push(fill);
275        }
276        result
277    }
278
279    /// Center-pad the string to `width` using `fill`, based on Unicode display width.
280    ///
281    /// If the padding is odd, the extra character goes on the right.
282    ///
283    /// ```
284    /// use philiprehberger_str_utils::StrExt;
285    /// assert_eq!("hi".pad_center(6, '-'), "--hi--");
286    /// ```
287    fn pad_center(&self, width: usize, fill: char) -> String {
288        let s = self.as_str_ext();
289        let current_width = UnicodeWidthStr::width(s);
290        if current_width >= width {
291            return s.to_string();
292        }
293        let total_padding = width - current_width;
294        let left_padding = total_padding / 2;
295        let right_padding = total_padding - left_padding;
296        let mut result = String::new();
297        for _ in 0..left_padding {
298            result.push(fill);
299        }
300        result.push_str(s);
301        for _ in 0..right_padding {
302            result.push(fill);
303        }
304        result
305    }
306
307    /// Collapse all consecutive whitespace to a single space and trim.
308    ///
309    /// ```
310    /// use philiprehberger_str_utils::StrExt;
311    /// assert_eq!("  hello   world  ".squish(), "hello world");
312    /// ```
313    fn squish(&self) -> String {
314        let s = self.as_str_ext();
315        s.split_whitespace().collect::<Vec<_>>().join(" ")
316    }
317
318    /// Remove common leading whitespace from all non-empty lines.
319    ///
320    /// ```
321    /// use philiprehberger_str_utils::StrExt;
322    /// let text = "    hello\n    world";
323    /// assert_eq!(text.dedent(), "hello\nworld");
324    /// ```
325    fn dedent(&self) -> String {
326        let s = self.as_str_ext();
327        let lines: Vec<&str> = s.lines().collect();
328
329        // Find minimum indentation among non-empty lines
330        let min_indent = lines
331            .iter()
332            .filter(|line| !line.trim().is_empty())
333            .map(|line| line.len() - line.trim_start().len())
334            .min()
335            .unwrap_or(0);
336
337        lines
338            .iter()
339            .map(|line| {
340                if line.len() >= min_indent {
341                    &line[min_indent..]
342                } else {
343                    line.trim()
344                }
345            })
346            .collect::<Vec<_>>()
347            .join("\n")
348    }
349
350    /// Prepend `prefix` to every line.
351    ///
352    /// ```
353    /// use philiprehberger_str_utils::StrExt;
354    /// assert_eq!("hello\nworld".indent("  "), "  hello\n  world");
355    /// ```
356    fn indent(&self, prefix: &str) -> String {
357        let s = self.as_str_ext();
358        s.lines()
359            .map(|line| format!("{}{}", prefix, line))
360            .collect::<Vec<_>>()
361            .join("\n")
362    }
363}
364
365impl StrExt for str {
366    fn as_str_ext(&self) -> &str {
367        self
368    }
369}
370
371impl StrExt for String {
372    fn as_str_ext(&self) -> &str {
373        self.as_str()
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    // --- Truncation ---
382
383    #[test]
384    fn truncate_ellipsis_basic() {
385        assert_eq!("Hello, World!".truncate_ellipsis(8), "Hello...");
386    }
387
388    #[test]
389    fn truncate_ellipsis_no_truncation_needed() {
390        assert_eq!("Short".truncate_ellipsis(10), "Short");
391    }
392
393    #[test]
394    fn truncate_ellipsis_exact_length() {
395        assert_eq!("Hello".truncate_ellipsis(5), "Hello");
396    }
397
398    #[test]
399    fn truncate_ellipsis_empty_string() {
400        assert_eq!("".truncate_ellipsis(5), "");
401    }
402
403    #[test]
404    fn truncate_ellipsis_max_len_less_than_suffix() {
405        assert_eq!("Hello".truncate_ellipsis(2), "..");
406    }
407
408    #[test]
409    fn truncate_with_custom_suffix() {
410        assert_eq!("Hello, World!".truncate_with(7, "~"), "Hello,~");
411    }
412
413    #[test]
414    fn truncate_unicode_emoji() {
415        // Emoji are single chars
416        let s = "Hello \u{1F600} World";
417        let result = s.truncate_ellipsis(9);
418        // 9 chars: "Hello " (6) + emoji (1) + " W" would be 9, but we need room for "..."
419        // keep = 9 - 3 = 6, so "Hello " + "..."
420        assert_eq!(result, "Hello ...");
421    }
422
423    #[test]
424    fn truncate_cjk() {
425        let s = "\u{4F60}\u{597D}\u{4E16}\u{754C}"; // 4 CJK chars
426        assert_eq!(s.truncate_ellipsis(10), s); // 4 chars <= 10
427        assert_eq!(s.truncate_ellipsis(4), s); // exactly 4 chars
428        assert_eq!(s.truncate_ellipsis(3), "..."); // 3 - 3 = 0 keep, just "..."
429    }
430
431    #[test]
432    fn truncate_single_char() {
433        assert_eq!("A".truncate_ellipsis(1), "A");
434    }
435
436    // --- Case conversion ---
437
438    #[test]
439    fn camel_case_from_spaces() {
440        assert_eq!("hello world".to_camel_case(), "helloWorld");
441    }
442
443    #[test]
444    fn camel_case_from_snake() {
445        assert_eq!("foo_bar".to_camel_case(), "fooBar");
446    }
447
448    #[test]
449    fn camel_case_from_kebab() {
450        assert_eq!("foo-bar-baz".to_camel_case(), "fooBarBaz");
451    }
452
453    #[test]
454    fn camel_case_empty() {
455        assert_eq!("".to_camel_case(), "");
456    }
457
458    #[test]
459    fn pascal_case_basic() {
460        assert_eq!("hello world".to_pascal_case(), "HelloWorld");
461        assert_eq!("foo_bar".to_pascal_case(), "FooBar");
462    }
463
464    #[test]
465    fn snake_case_from_camel() {
466        assert_eq!("helloWorld".to_snake_case(), "hello_world");
467    }
468
469    #[test]
470    fn snake_case_from_spaces() {
471        assert_eq!("Hello World".to_snake_case(), "hello_world");
472    }
473
474    #[test]
475    fn kebab_case_from_camel() {
476        assert_eq!("helloWorld".to_kebab_case(), "hello-world");
477    }
478
479    #[test]
480    fn kebab_case_from_spaces() {
481        assert_eq!("hello world".to_kebab_case(), "hello-world");
482    }
483
484    #[test]
485    fn screaming_snake_basic() {
486        assert_eq!("helloWorld".to_screaming_snake(), "HELLO_WORLD");
487        assert_eq!("foo_bar".to_screaming_snake(), "FOO_BAR");
488    }
489
490    #[test]
491    fn title_case_basic() {
492        assert_eq!("hello world".to_title_case(), "Hello World");
493        assert_eq!("foo_bar".to_title_case(), "Foo Bar");
494    }
495
496    #[test]
497    fn title_case_single_word() {
498        assert_eq!("hello".to_title_case(), "Hello");
499    }
500
501    // --- Word splitting edge cases ---
502
503    #[test]
504    fn split_words_html_parser() {
505        let words = split_words("HTMLParser");
506        assert_eq!(
507            words.iter().map(|w| w.to_lowercase()).collect::<Vec<_>>(),
508            vec!["html", "parser"]
509        );
510    }
511
512    #[test]
513    fn split_words_get_https_response() {
514        let words = split_words("getHTTPSResponse");
515        assert_eq!(
516            words.iter().map(|w| w.to_lowercase()).collect::<Vec<_>>(),
517            vec!["get", "https", "response"]
518        );
519    }
520
521    #[test]
522    fn split_words_all_caps() {
523        let words = split_words("HTTP");
524        assert_eq!(words, vec!["HTTP"]);
525    }
526
527    #[test]
528    fn split_words_mixed_delimiters() {
529        let words = split_words("foo_bar-baz qux");
530        assert_eq!(words, vec!["foo", "bar", "baz", "qux"]);
531    }
532
533    // --- Padding ---
534
535    #[test]
536    fn pad_left_basic() {
537        assert_eq!("hi".pad_left(5, ' '), "   hi");
538    }
539
540    #[test]
541    fn pad_left_no_padding_needed() {
542        assert_eq!("hello".pad_left(3, ' '), "hello");
543    }
544
545    #[test]
546    fn pad_right_basic() {
547        assert_eq!("hi".pad_right(5, '.'), "hi...");
548    }
549
550    #[test]
551    fn pad_center_even() {
552        assert_eq!("hi".pad_center(6, '-'), "--hi--");
553    }
554
555    #[test]
556    fn pad_center_odd() {
557        assert_eq!("hi".pad_center(7, '-'), "--hi---");
558    }
559
560    #[test]
561    fn pad_empty_string() {
562        assert_eq!("".pad_left(3, '*'), "***");
563        assert_eq!("".pad_right(3, '*'), "***");
564        assert_eq!("".pad_center(3, '*'), "***");
565    }
566
567    #[test]
568    fn pad_cjk_width() {
569        // CJK characters take 2 columns each
570        let s = "\u{4F60}\u{597D}"; // 2 chars, 4 columns wide
571        assert_eq!(s.pad_right(6, '.'), "\u{4F60}\u{597D}..");
572        assert_eq!(s.pad_left(6, '.'), "..\u{4F60}\u{597D}");
573    }
574
575    // --- Whitespace utilities ---
576
577    #[test]
578    fn squish_basic() {
579        assert_eq!("  hello   world  ".squish(), "hello world");
580    }
581
582    #[test]
583    fn squish_tabs_and_newlines() {
584        assert_eq!("hello\t\t  world\n\nfoo".squish(), "hello world foo");
585    }
586
587    #[test]
588    fn squish_empty() {
589        assert_eq!("".squish(), "");
590    }
591
592    #[test]
593    fn squish_only_whitespace() {
594        assert_eq!("   \t\n  ".squish(), "");
595    }
596
597    #[test]
598    fn dedent_basic() {
599        let text = "    hello\n    world";
600        assert_eq!(text.dedent(), "hello\nworld");
601    }
602
603    #[test]
604    fn dedent_mixed_indent() {
605        let text = "    hello\n      world";
606        assert_eq!(text.dedent(), "hello\n  world");
607    }
608
609    #[test]
610    fn dedent_with_empty_lines() {
611        let text = "    hello\n\n    world";
612        assert_eq!(text.dedent(), "hello\n\nworld");
613    }
614
615    #[test]
616    fn dedent_no_indent() {
617        let text = "hello\nworld";
618        assert_eq!(text.dedent(), "hello\nworld");
619    }
620
621    #[test]
622    fn indent_basic() {
623        assert_eq!("hello\nworld".indent("  "), "  hello\n  world");
624    }
625
626    #[test]
627    fn indent_single_line() {
628        assert_eq!("hello".indent(">>> "), ">>> hello");
629    }
630
631    #[test]
632    fn indent_empty_string() {
633        // "".lines() yields no items, so join produces ""
634        assert_eq!("".indent("  "), "");
635    }
636
637    // --- String type support ---
638
639    #[test]
640    fn works_with_string_type() {
641        let s = String::from("hello world");
642        assert_eq!(s.to_camel_case(), "helloWorld");
643        assert_eq!(s.squish(), "hello world");
644        assert_eq!(s.pad_left(15, '.'), "....hello world");
645    }
646}