substring/lib.rs
1//! Substring method for string types.
2//!
3//! This crate provides a [`substring()`] method on Rust string types. The method takes a start and
4//! end character index and returns a string slice of the characters within that range.
5//!
6//! The method is provided via the [`Substring`] trait which is implemented on the [`str`]
7//! primitive.
8//!
9//! # Example
10//! ```
11//! use substring::Substring;
12//!
13//! // Works on a string slice.
14//! assert_eq!("foobar".substring(2,5), "oba");
15//!
16//! // Also works on a String.
17//! assert_eq!("foobar".to_string().substring(1,6), "oobar");
18//! ```
19//!
20//! As Rust strings are UTF-8 encoded, the algorithm for finding a character substring is `O(n)`,
21//! where `n` is the byte length of the string. This is due to characters not being of predictible
22//! byte lengths.
23//!
24//! # Note
25//! The indexing of substrings is based on [*Unicode Scalar Value*]. As such, substrings may not
26//! always match your intuition:
27//!
28//! ```
29//! use substring::Substring;
30//!
31//! assert_eq!("ã".substring(0, 1), "a"); // As opposed to "ã".
32//! assert_eq!("ã".substring(1, 2), "\u{0303}")
33//! ```
34//!
35//! The above example occurs because "ã" is technically made up of two UTF-8 scalar values.
36//!
37//! [`str`]: https://doc.rust-lang.org/std/primitive.str.html
38//! [`Substring`]: trait.Substring.html
39//! [`substring()`]: trait.Substring.html#tymethod.substring
40//!
41//! [*Unicode Scalar Value*]: http://www.unicode.org/glossary/#unicode_scalar_value
42
43#![deny(missing_docs)]
44// Since the MSRV is 1.0.0, allowing usage of deprecated items is ok, as the replacements are likely
45// not available in early versions.
46#![allow(deprecated)]
47#![cfg_attr(rustc_1_6, no_std)]
48
49#[cfg(not(rustc_1_6))]
50extern crate std as core;
51
52/// Provides a [`substring()`] method.
53///
54/// The [`substring()`] method obtains a string slice of characters within the range specified by
55/// `start_index` and `end_index`.
56///
57/// [`substring()`]: trait.Substring.html#tymethod.substring
58pub trait Substring {
59 /// Obtains a string slice containing the characters within the range specified by
60 /// `start_index` and `end_index`.
61 ///
62 /// The range specified is a character range, not a byte range.
63 fn substring(&self, start_index: usize, end_index: usize) -> &str;
64}
65
66/// Implements a [`substring()`] method for [`str`].
67///
68/// Note that structs which implement `Deref<Target=str>` (such as [`String`]) will also have
69/// access to this implementation.
70///
71/// [`str`]: https://doc.rust-lang.org/std/primitive.str.html
72/// [`String`]: https://doc.rust-lang.org/std/string/struct.String.html
73/// [`substring()`]: trait.Substring.html#method.substring
74impl Substring for str {
75 /// Obtain a slice of the characters within the range of `start_index` and `end_index`.
76 ///
77 /// As this is by character index, rather than byte index, the temporal complexity of finding a
78 /// substring is *O(n)*, where *n* is the byte length of the string.
79 ///
80 /// Example:
81 /// ```
82 /// use substring::Substring;
83 ///
84 /// assert_eq!("foobar".substring(2,5), "oba");
85 /// ```
86 #[must_use]
87 fn substring(&self, start_index: usize, end_index: usize) -> &str {
88 if end_index <= start_index {
89 return "";
90 }
91
92 let mut indices = self.char_indices();
93
94 let obtain_index = |(index, _char)| index;
95 let str_len = self.len();
96
97 unsafe {
98 // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
99 // that the indices obtained from it will always be within the bounds of `self` and they
100 // will always lie on UTF-8 sequence boundaries.
101 self.slice_unchecked(
102 indices.nth(start_index).map_or(str_len, &obtain_index),
103 indices
104 .nth(end_index - start_index - 1)
105 .map_or(str_len, &obtain_index),
106 )
107 }
108 }
109}
110
111#[cfg(test)]
112mod tests {
113 use Substring;
114
115 #[test]
116 fn test_substring() {
117 assert_eq!("foobar".substring(0, 3), "foo");
118 }
119
120 #[test]
121 fn test_out_of_bounds() {
122 assert_eq!("foobar".substring(0, 10), "foobar");
123 assert_eq!("foobar".substring(6, 10), "");
124 }
125
126 #[test]
127 fn test_start_less_than_end() {
128 assert_eq!("foobar".substring(3, 2), "");
129 }
130
131 #[test]
132 fn test_start_and_end_equal() {
133 assert_eq!("foobar".substring(3, 3), "");
134 }
135
136 #[test]
137 fn test_multiple_byte_characters() {
138 assert_eq!("fõøbα®".substring(2, 5), "øbα");
139 }
140}