pub struct StringAccessor<'a> { /* private fields */ }Expand description
Created by Series::str(). Provides string manipulation methods
analogous to pandas Series.str namespace.
Implementations§
Source§impl StringAccessor<'_>
impl StringAccessor<'_>
Sourcepub fn lower(&self) -> Result<Series, FrameError>
pub fn lower(&self) -> Result<Series, FrameError>
Convert strings to lowercase.
Sourcepub fn upper(&self) -> Result<Series, FrameError>
pub fn upper(&self) -> Result<Series, FrameError>
Convert strings to uppercase.
Sourcepub fn strip(&self) -> Result<Series, FrameError>
pub fn strip(&self) -> Result<Series, FrameError>
Strip leading and trailing whitespace.
Sourcepub fn strip_chars(&self, to_strip: &str) -> Result<Series, FrameError>
pub fn strip_chars(&self, to_strip: &str) -> Result<Series, FrameError>
Strip leading and trailing characters in to_strip.
Matches pd.Series.str.strip(chars).
Sourcepub fn lstrip(&self) -> Result<Series, FrameError>
pub fn lstrip(&self) -> Result<Series, FrameError>
Strip leading whitespace.
Sourcepub fn lstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>
pub fn lstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>
Strip leading characters in to_strip.
Matches pd.Series.str.lstrip(chars).
Sourcepub fn rstrip(&self) -> Result<Series, FrameError>
pub fn rstrip(&self) -> Result<Series, FrameError>
Strip trailing whitespace.
Sourcepub fn rstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>
pub fn rstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>
Strip trailing characters in to_strip.
Matches pd.Series.str.rstrip(chars).
Sourcepub fn contains(&self, pat: &str) -> Result<Series, FrameError>
pub fn contains(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string contains a pattern.
Sourcepub fn contains_any(&self, pats: &[&str]) -> Result<Series, FrameError>
pub fn contains_any(&self, pats: &[&str]) -> Result<Series, FrameError>
Check whether each string contains any of the given patterns.
Matches pd.Series.str.contains('|'.join(pats), regex=True) but for literals.
Examples found in repository?
25fn golden() -> String {
26 let mut out = String::new();
27 let s = s_from(vec!["hello world", "foobar", "BAZ qux", "", "a.b+c"]);
28
29 let r = s.str().contains_any(&["world", "qux"]).unwrap();
30 out.push_str(&format!("hit={:?}\n", r.values()));
31
32 // Regex metacharacters must be treated as LITERALS.
33 let r2 = s.str().contains_any(&["a.b+c", "zz"]).unwrap();
34 out.push_str(&format!("literal_meta={:?}\n", r2.values()));
35
36 // Case sensitive (no match for lowercase 'baz').
37 let r3 = s.str().contains_any(&["baz"]).unwrap();
38 out.push_str(&format!("case={:?}\n", r3.values()));
39
40 // Empty pattern set => all false.
41 let r4 = s.str().contains_any(&[]).unwrap();
42 out.push_str(&format!("empty={:?}\n", r4.values()));
43
44 // Empty-string pattern matches every (non-null) string.
45 let r5 = s.str().contains_any(&["", "zz"]).unwrap();
46 out.push_str(&format!("empty_pat={:?}\n", r5.values()));
47 out
48}
49
50fn main() {
51 let g = golden();
52 print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
53
54 // Many patterns, mostly-missing, over many medium strings (worst case:
55 // every pattern scanned per string in the naive path).
56 let pats_owned: Vec<String> = (0..200).map(|i| format!("needle{i:04}xyz")).collect();
57 let pats: Vec<&str> = pats_owned.iter().map(String::as_str).collect();
58 let base = "the quick brown fox jumps over the lazy dog ".repeat(4);
59 let n = 20_000;
60 let s = s_from(vec![base.as_str(); n]);
61
62 // warmup
63 let _ = s.str().contains_any(&pats).unwrap();
64
65 let t = Instant::now();
66 let r = s.str().contains_any(&pats).unwrap();
67 let d = t.elapsed();
68 assert_eq!(r.len(), n);
69
70 println!(
71 "TIMING n={n} npats={} contains_any={:.3}ms",
72 pats.len(),
73 d.as_secs_f64() * 1e3
74 );
75}Sourcepub fn contains_with_options(
&self,
pat: &str,
case: bool,
na: Option<bool>,
regex: bool,
) -> Result<Series, FrameError>
pub fn contains_with_options( &self, pat: &str, case: bool, na: Option<bool>, regex: bool, ) -> Result<Series, FrameError>
Check whether each string contains pat with case/na/regex options.
Matches pd.Series.str.contains(pat, case=True, na=None, regex=True):
regex=truetreatspatas a regular expression;regex=falsetreats it as a literal substring search.case=falsemakes the match case-insensitive.na=Nonepropagates nulls as NaN;na=Some(bool)replaces null entries with that boolean, matching pandas’ fill-on-NaN behavior.
Sourcepub fn replace(&self, pat: &str, repl: &str) -> Result<Series, FrameError>
pub fn replace(&self, pat: &str, repl: &str) -> Result<Series, FrameError>
Replace occurrences of a pattern with a replacement string.
Sourcepub fn replace_with_options(
&self,
pat: &str,
repl: &str,
n: Option<usize>,
case: bool,
regex: bool,
) -> Result<Series, FrameError>
pub fn replace_with_options( &self, pat: &str, repl: &str, n: Option<usize>, case: bool, regex: bool, ) -> Result<Series, FrameError>
Replace with pandas-parity case/regex/n options.
Matches pd.Series.str.replace(pat, repl, n=-1, case=True, regex=True):
regex=truecompilespatas a regex;regex=falsetreats it as a literal substring.case=falseperforms case-insensitive matching. For literal mode this is implemented via case-foldedfindscanning.n=None(pandas-1) replaces every occurrence.n=Some(k)caps at the firstkreplacements per cell.
Sourcepub fn startswith(&self, pat: &str) -> Result<Series, FrameError>
pub fn startswith(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string starts with a prefix.
Sourcepub fn startswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>
pub fn startswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>
Check whether each string starts with any of the given prefixes.
Matches pd.Series.str.startswith((p1, p2, ...)).
Examples found in repository?
25fn golden() -> String {
26 let mut out = String::new();
27 let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29 let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30 out.push_str(&format!("sw_hit={:?}\n", r.values()));
31 // metacharacters stay literal
32 let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33 out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34 let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35 out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36 // empty pattern set => all false
37 let r4 = s.str().startswith_any(&[]).unwrap();
38 out.push_str(&format!("empty={:?}\n", r4.values()));
39 // empty-string pattern matches every non-null string
40 let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41 out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43 // with_na variants (null fill).
44 let sn = Series::from_values(
45 "s",
46 vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47 vec![
48 Scalar::Utf8("https://z".into()),
49 Scalar::Null(fp_types::NullKind::NaN),
50 ],
51 )
52 .unwrap();
53 let rn = sn
54 .str()
55 .startswith_any_with_na(&["https://"], Some(true))
56 .unwrap();
57 out.push_str(&format!("na_fill={:?}\n", rn.values()));
58 out
59}
60
61fn main() {
62 let g = golden();
63 print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
64
65 // Worst case: many prefixes sharing a long common stem.
66 let pats_owned: Vec<String> = (0..200)
67 .map(|i| format!("https://cdn.example.com/path/{i:04}/"))
68 .collect();
69 let pats: Vec<&str> = pats_owned.iter().map(String::as_str).collect();
70 let one = "https://cdn.example.com/path/9999/asset/file/deep/name.bin";
71 let n = 40_000;
72 let s = s_from(vec![one; n]);
73
74 let _ = s.str().startswith_any(&pats).unwrap(); // warmup
75
76 let t = Instant::now();
77 let r = s.str().startswith_any(&pats).unwrap();
78 let d = t.elapsed();
79 assert_eq!(r.len(), n);
80
81 println!(
82 "TIMING n={n} npats={} startswith_any={:.3}ms",
83 pats.len(),
84 d.as_secs_f64() * 1e3
85 );
86}Sourcepub fn startswith_with_na(
&self,
pat: &str,
na: Option<bool>,
) -> Result<Series, FrameError>
pub fn startswith_with_na( &self, pat: &str, na: Option<bool>, ) -> Result<Series, FrameError>
Check whether each string starts with pat, replacing nulls with na.
Matches pd.Series.str.startswith(pat, na=...). When na is
None (Rust None), nulls propagate as NaN — same as the
default pandas behavior on 2.2+. When na is Some(true) /
Some(false), null entries are replaced with that boolean.
Sourcepub fn startswith_any_with_na(
&self,
pats: &[&str],
na: Option<bool>,
) -> Result<Series, FrameError>
pub fn startswith_any_with_na( &self, pats: &[&str], na: Option<bool>, ) -> Result<Series, FrameError>
Check whether each string starts with any of pats, replacing nulls with na.
Matches pd.Series.str.startswith((p1, p2, ...), na=...).
Examples found in repository?
25fn golden() -> String {
26 let mut out = String::new();
27 let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29 let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30 out.push_str(&format!("sw_hit={:?}\n", r.values()));
31 // metacharacters stay literal
32 let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33 out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34 let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35 out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36 // empty pattern set => all false
37 let r4 = s.str().startswith_any(&[]).unwrap();
38 out.push_str(&format!("empty={:?}\n", r4.values()));
39 // empty-string pattern matches every non-null string
40 let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41 out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43 // with_na variants (null fill).
44 let sn = Series::from_values(
45 "s",
46 vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47 vec![
48 Scalar::Utf8("https://z".into()),
49 Scalar::Null(fp_types::NullKind::NaN),
50 ],
51 )
52 .unwrap();
53 let rn = sn
54 .str()
55 .startswith_any_with_na(&["https://"], Some(true))
56 .unwrap();
57 out.push_str(&format!("na_fill={:?}\n", rn.values()));
58 out
59}Sourcepub fn endswith(&self, pat: &str) -> Result<Series, FrameError>
pub fn endswith(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string ends with a suffix.
Sourcepub fn endswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>
pub fn endswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>
Check whether each string ends with any of the given suffixes.
Matches pd.Series.str.endswith((s1, s2, ...)).
Examples found in repository?
25fn golden() -> String {
26 let mut out = String::new();
27 let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29 let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30 out.push_str(&format!("sw_hit={:?}\n", r.values()));
31 // metacharacters stay literal
32 let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33 out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34 let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35 out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36 // empty pattern set => all false
37 let r4 = s.str().startswith_any(&[]).unwrap();
38 out.push_str(&format!("empty={:?}\n", r4.values()));
39 // empty-string pattern matches every non-null string
40 let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41 out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43 // with_na variants (null fill).
44 let sn = Series::from_values(
45 "s",
46 vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47 vec![
48 Scalar::Utf8("https://z".into()),
49 Scalar::Null(fp_types::NullKind::NaN),
50 ],
51 )
52 .unwrap();
53 let rn = sn
54 .str()
55 .startswith_any_with_na(&["https://"], Some(true))
56 .unwrap();
57 out.push_str(&format!("na_fill={:?}\n", rn.values()));
58 out
59}Sourcepub fn endswith_with_na(
&self,
pat: &str,
na: Option<bool>,
) -> Result<Series, FrameError>
pub fn endswith_with_na( &self, pat: &str, na: Option<bool>, ) -> Result<Series, FrameError>
Check whether each string ends with pat, replacing nulls with na.
Matches pd.Series.str.endswith(pat, na=...).
Sourcepub fn endswith_any_with_na(
&self,
pats: &[&str],
na: Option<bool>,
) -> Result<Series, FrameError>
pub fn endswith_any_with_na( &self, pats: &[&str], na: Option<bool>, ) -> Result<Series, FrameError>
Check whether each string ends with any of pats, replacing nulls with na.
Matches pd.Series.str.endswith((s1, s2, ...), na=...).
Sourcepub fn len(&self) -> Result<Series, FrameError>
pub fn len(&self) -> Result<Series, FrameError>
Get the length of each string (character count, not byte count).
Per br-frankenpandas-rg8ys.6.6: pandas returns float64 (not int64) to represent nullable integers. Nulls become NaN.
Sourcepub fn slice(
&self,
start: Option<i64>,
stop: Option<i64>,
step: Option<i64>,
) -> Result<Series, FrameError>
pub fn slice( &self, start: Option<i64>, stop: Option<i64>, step: Option<i64>, ) -> Result<Series, FrameError>
Slice each string from start to end.
Sourcepub fn slice_replace(
&self,
start: Option<i64>,
stop: Option<i64>,
repl: &str,
) -> Result<Series, FrameError>
pub fn slice_replace( &self, start: Option<i64>, stop: Option<i64>, repl: &str, ) -> Result<Series, FrameError>
Replace a positional slice of each string.
Matches pd.Series.str.slice_replace(start, stop, repl), including
Python-style NEGATIVE start/stop. The replaced span is chars[start..]
up to chars[max(start, stop)..], i.e. s[:start] + repl + s[stop:]
with stop never preceding start. Verified vs live pandas 2.2.3:
“abcde”.slice_replace(-2, None, “X”) -> “abcX”;
.slice_replace(0, -1, “X”) -> “Xe”; .slice_replace(3, 1, “X”) -> “abcXde”.
Sourcepub fn split_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>
pub fn split_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>
Split each string by a separator and return the n-th element.
Sourcepub fn rsplit_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>
pub fn rsplit_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>
Split each string from the right and return the n-th element.
Matches pd.Series.str.rsplit(pat).str[n].
Sourcepub fn split_df(&self, pat: &str) -> Result<DataFrame, FrameError>
pub fn split_df(&self, pat: &str) -> Result<DataFrame, FrameError>
Split each string by a separator and return a DataFrame.
Matches pd.Series.str.split(pat, expand=True).
Sourcepub fn split_df_n(
&self,
pat: &str,
n: Option<usize>,
) -> Result<DataFrame, FrameError>
pub fn split_df_n( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>
Split each string with a maximum number of splits, returning a DataFrame.
Matches pd.Series.str.split(pat, n=..., expand=True). n=None
means split on every occurrence (default behavior); n=Some(k)
caps the result at k + 1 parts by doing at most k splits.
Sourcepub fn rsplit_df(
&self,
pat: &str,
n: Option<usize>,
) -> Result<DataFrame, FrameError>
pub fn rsplit_df( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>
Split each string from the right and return a DataFrame.
Matches pd.Series.str.rsplit(pat, expand=True).
Sourcepub fn split_count(&self, pat: &str) -> Result<Series, FrameError>
pub fn split_count(&self, pat: &str) -> Result<Series, FrameError>
Count the number of parts when splitting by pattern.
Matches pd.Series.str.split(pat).str.len(). Returns Int64 count.
Sourcepub fn join(&self, from: &str, sep: &str) -> Result<Series, FrameError>
pub fn join(&self, from: &str, sep: &str) -> Result<Series, FrameError>
Join/concatenate each string element with a separator.
Applies to each element: useful after conceptual split operations.
Replaces occurrences of from with sep.
Sourcepub fn capitalize(&self) -> Result<Series, FrameError>
pub fn capitalize(&self) -> Result<Series, FrameError>
Capitalize the first character and lowercase the rest (pandas behavior).
Sourcepub fn title(&self) -> Result<Series, FrameError>
pub fn title(&self) -> Result<Series, FrameError>
Title case each string.
Sourcepub fn pad(
&self,
width: usize,
side: &str,
fillchar: char,
) -> Result<Series, FrameError>
pub fn pad( &self, width: usize, side: &str, fillchar: char, ) -> Result<Series, FrameError>
Pad strings to a minimum width with a fill character.
Sourcepub fn contains_regex(&self, pat: &str) -> Result<Series, FrameError>
pub fn contains_regex(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string matches a regex pattern.
Analogous to pandas.Series.str.contains(pat, regex=True).
Sourcepub fn replace_regex(&self, pat: &str, repl: &str) -> Result<Series, FrameError>
pub fn replace_regex(&self, pat: &str, repl: &str) -> Result<Series, FrameError>
Replace first occurrence of a regex pattern with a replacement string.
Analogous to pandas.Series.str.replace(pat, repl, regex=True).
The replacement string supports backreferences ($1, $2, etc.).
Sourcepub fn replace_regex_all(
&self,
pat: &str,
repl: &str,
) -> Result<Series, FrameError>
pub fn replace_regex_all( &self, pat: &str, repl: &str, ) -> Result<Series, FrameError>
Replace all occurrences of a regex pattern with a replacement string.
Analogous to pandas.Series.str.replace(pat, repl, regex=True, n=-1).
Sourcepub fn extract(&self, pat: &str) -> Result<Series, FrameError>
pub fn extract(&self, pat: &str) -> Result<Series, FrameError>
Extract the first match of a regex capture group.
Analogous to pandas.Series.str.extract(pat, expand=False) for a
single-group pattern. Returns the first capture group (group 1) if
the pattern contains a group, otherwise returns the full match
(group 0). Non-matching strings produce Null. If the pattern’s
single capture group is named, the group name is used as the
Series name (matching pandas 2.2 behavior).
Sourcepub fn extract_df(&self, pat: &str) -> Result<DataFrame, FrameError>
pub fn extract_df(&self, pat: &str) -> Result<DataFrame, FrameError>
Extract multiple capture groups as a DataFrame.
Matches pd.Series.str.extract(pat) when pat has multiple groups.
Sourcepub fn count(&self, pat: &str) -> Result<Series, FrameError>
pub fn count(&self, pat: &str) -> Result<Series, FrameError>
Count occurrences of pattern in each string.
Matches pd.Series.str.count(pat).
Sourcepub fn extract_to_frame(&self, pat: &str) -> Result<DataFrame, FrameError>
pub fn extract_to_frame(&self, pat: &str) -> Result<DataFrame, FrameError>
Extract capture groups from a regex, returning a DataFrame.
Analogous to pandas.Series.str.extract(pat, expand=True). Returns
a DataFrame where each column corresponds to a capture group. If the
pattern has named groups (?P<name>...), those names become column
names; otherwise columns are numbered “0”, “1”, etc. Non-matching
rows produce NaN in every group column.
Sourcepub fn extractall(&self, pat: &str) -> Result<DataFrame, FrameError>
pub fn extractall(&self, pat: &str) -> Result<DataFrame, FrameError>
Extract all matches of a regex capture group, returning a DataFrame.
Analogous to pandas.Series.str.extractall(pat). Returns a DataFrame
where each row is a match, with a composite index “original_idx, match_n”.
If the pattern contains capture groups, returns one column per group.
Named capture groups ((?P<name>...) / (?<name>...)) produce columns
with that name; unnamed groups use positional names 0, 1, ....
Sourcepub fn split_expand(&self, pat: &str) -> Result<DataFrame, FrameError>
pub fn split_expand(&self, pat: &str) -> Result<DataFrame, FrameError>
Split strings by pattern, expanding into a DataFrame.
Analogous to pandas.Series.str.split(pat, expand=True).
Returns a DataFrame where each column is a split part.
Shorter splits are padded with NaN.
Sourcepub fn split_expand_n(
&self,
pat: &str,
n: Option<usize>,
) -> Result<DataFrame, FrameError>
pub fn split_expand_n( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>
Split strings by pattern with an optional split limit, expanding into a DataFrame.
Analogous to pandas.Series.str.split(pat, n=..., expand=True).
Shorter splits are padded with NaN.
Sourcepub fn count_matches(&self, pat: &str) -> Result<Series, FrameError>
pub fn count_matches(&self, pat: &str) -> Result<Series, FrameError>
Count non-overlapping matches of a regex pattern in each string.
Analogous to pandas.Series.str.count(pat).
Sourcepub fn count_literal(&self, pat: &str) -> Result<Series, FrameError>
pub fn count_literal(&self, pat: &str) -> Result<Series, FrameError>
Count non-overlapping occurrences of a literal substring.
Matches pd.Series.str.count(pat) for literal patterns.
Sourcepub fn findall(&self, pat: &str, sep: &str) -> Result<Series, FrameError>
pub fn findall(&self, pat: &str, sep: &str) -> Result<Series, FrameError>
Find all non-overlapping matches and return them joined by a separator.
Since Series cannot hold list values, matches are joined with sep.
Non-matching strings produce Null.
Sourcepub fn fullmatch(&self, pat: &str) -> Result<Series, FrameError>
pub fn fullmatch(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string fully matches a regex pattern.
Unlike contains_regex which searches for a match anywhere,
this requires the entire string to match (anchored ^...$).
Sourcepub fn fullmatch_with_options(
&self,
pat: &str,
case: bool,
na: Option<bool>,
) -> Result<Series, FrameError>
pub fn fullmatch_with_options( &self, pat: &str, case: bool, na: Option<bool>, ) -> Result<Series, FrameError>
Fullmatch with case/na option parity.
Matches pd.Series.str.fullmatch(pat, case=True, na=None).
case=false enables inline (?i) matching. na=None propagates
nulls as NaN; na=Some(bool) replaces null entries with the
chosen boolean.
Sourcepub fn match_regex(&self, pat: &str) -> Result<Series, FrameError>
pub fn match_regex(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string matches a regex pattern at the start.
Analogous to pandas.Series.str.match(pat) which uses Python’s
re.match() (anchored at the beginning of the string).
Sourcepub fn match(&self, pat: &str) -> Result<Series, FrameError>
pub fn match(&self, pat: &str) -> Result<Series, FrameError>
Check whether each string matches a regex pattern at the start.
Rust raw-identifier spelling for pandas.Series.str.match(pat).
Sourcepub fn match_regex_with_options(
&self,
pat: &str,
case: bool,
na: Option<bool>,
) -> Result<Series, FrameError>
pub fn match_regex_with_options( &self, pat: &str, case: bool, na: Option<bool>, ) -> Result<Series, FrameError>
Match (start-anchored) with case/na option parity.
Matches pd.Series.str.match(pat, case=True, na=None) — Python’s
re.match semantics (anchored at start, not end). case=false
enables inline (?i) matching; na handling mirrors
fullmatch_with_options.
Sourcepub fn split_regex_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>
pub fn split_regex_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>
Split each string by a regex pattern and return the n-th element.
Analogous to pandas.Series.str.split(pat, regex=True).str[n].
Sourcepub fn zfill(&self, width: usize) -> Result<Series, FrameError>
pub fn zfill(&self, width: usize) -> Result<Series, FrameError>
Zero-fill strings to specified width.
Matches pd.Series.str.zfill(width).
Sourcepub fn center(&self, width: usize, fillchar: char) -> Result<Series, FrameError>
pub fn center(&self, width: usize, fillchar: char) -> Result<Series, FrameError>
Center-align strings within specified width.
Matches pd.Series.str.center(width, fillchar).
Sourcepub fn ljust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>
pub fn ljust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>
Left-align strings within specified width (pad on right).
Matches pd.Series.str.ljust(width, fillchar).
Sourcepub fn rjust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>
pub fn rjust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>
Right-align strings within specified width (pad on left).
Matches pd.Series.str.rjust(width, fillchar).
Sourcepub fn isdigit(&self) -> Result<Series, FrameError>
pub fn isdigit(&self) -> Result<Series, FrameError>
Check if each string is composed of digits only.
Matches pd.Series.str.isdigit().
Sourcepub fn isalpha(&self) -> Result<Series, FrameError>
pub fn isalpha(&self) -> Result<Series, FrameError>
Check if each string is composed of alphabetic characters only.
Matches pd.Series.str.isalpha().
Sourcepub fn isalnum(&self) -> Result<Series, FrameError>
pub fn isalnum(&self) -> Result<Series, FrameError>
Check if each string is alphanumeric.
Matches pd.Series.str.isalnum().
Sourcepub fn isascii(&self) -> Result<Series, FrameError>
pub fn isascii(&self) -> Result<Series, FrameError>
Check if each string is composed of ASCII characters only.
Matches pd.Series.str.isascii().
Sourcepub fn isspace(&self) -> Result<Series, FrameError>
pub fn isspace(&self) -> Result<Series, FrameError>
Check if each string is composed of whitespace only.
Matches pd.Series.str.isspace().
Sourcepub fn islower(&self) -> Result<Series, FrameError>
pub fn islower(&self) -> Result<Series, FrameError>
Check if each string is lowercase.
Matches pd.Series.str.islower().
Sourcepub fn isupper(&self) -> Result<Series, FrameError>
pub fn isupper(&self) -> Result<Series, FrameError>
Check if each string is uppercase.
Matches pd.Series.str.isupper().
Sourcepub fn isnumeric(&self) -> Result<Series, FrameError>
pub fn isnumeric(&self) -> Result<Series, FrameError>
Check if each string is numeric (including Unicode numeric chars).
Matches pd.Series.str.isnumeric().
Sourcepub fn get(&self, i: i64) -> Result<Series, FrameError>
pub fn get(&self, i: i64) -> Result<Series, FrameError>
Extract character at position from each string.
Matches pd.Series.str.get(i). Returns NaN if index is out of bounds.
Sourcepub fn wrap(&self, width: usize) -> Result<Series, FrameError>
pub fn wrap(&self, width: usize) -> Result<Series, FrameError>
Wrap long lines at specified width.
Matches pd.Series.str.wrap(width). Inserts newlines to wrap text.
Sourcepub fn wrap_with_drop_whitespace(
&self,
width: usize,
drop_whitespace: bool,
) -> Result<Series, FrameError>
pub fn wrap_with_drop_whitespace( &self, width: usize, drop_whitespace: bool, ) -> Result<Series, FrameError>
Wrap long lines while controlling whether boundary whitespace is dropped.
Matches pd.Series.str.wrap(width, drop_whitespace=...) for the
supported strict-mode behavior covered by the conformance fixtures.
Sourcepub fn normalize(&self, form: &str) -> Result<Series, FrameError>
pub fn normalize(&self, form: &str) -> Result<Series, FrameError>
Normalize Unicode strings.
Matches pd.Series.str.normalize(form) for NFC, NFKC, NFD, and NFKD.
Sourcepub fn isdecimal(&self) -> Result<Series, FrameError>
pub fn isdecimal(&self) -> Result<Series, FrameError>
Check if each string is a valid decimal number.
Matches pd.Series.str.isdecimal().
Sourcepub fn istitle(&self) -> Result<Series, FrameError>
pub fn istitle(&self) -> Result<Series, FrameError>
Check if each string is titlecased.
Matches pd.Series.str.istitle().
Sourcepub fn cat(&self, sep: &str) -> Result<String, FrameError>
pub fn cat(&self, sep: &str) -> Result<String, FrameError>
Return the string representation of each element.
Matches pd.Series.str.cat() - concatenate strings.
Concatenates all strings in the Series with a separator.
Sourcepub fn cat_series(
&self,
others: &Series,
sep: &str,
na_rep: Option<&str>,
) -> Result<Series, FrameError>
pub fn cat_series( &self, others: &Series, sep: &str, na_rep: Option<&str>, ) -> Result<Series, FrameError>
Concatenate strings element-wise with another Series.
Matches pd.Series.str.cat(others, sep, na_rep).
na_rep is used to replace Null values in either Series.
Sourcepub fn cat_list(
&self,
others: &[&Series],
sep: &str,
na_rep: Option<&str>,
) -> Result<Series, FrameError>
pub fn cat_list( &self, others: &[&Series], sep: &str, na_rep: Option<&str>, ) -> Result<Series, FrameError>
Concatenate strings element-wise with multiple other Series.
Matches pd.Series.str.cat([s1, s2, ...], sep, na_rep).
Sourcepub fn find(&self, sub: &str) -> Result<Series, FrameError>
pub fn find(&self, sub: &str) -> Result<Series, FrameError>
Find the first occurrence of a substring.
Matches pd.Series.str.find(sub). Returns -1 if not found.
Per br-frankenpandas-02ae2b: pandas returns CHAR-based positions
(Python 3 strings are char-indexed), not byte positions.
Sourcepub fn rfind(&self, sub: &str) -> Result<Series, FrameError>
pub fn rfind(&self, sub: &str) -> Result<Series, FrameError>
Find the last occurrence of a substring.
Matches pd.Series.str.rfind(sub). Returns -1 if not found.
Per br-frankenpandas-02ae2b: char-based, not byte-based.
Sourcepub fn index(&self, sub: &str) -> Result<Series, FrameError>
pub fn index(&self, sub: &str) -> Result<Series, FrameError>
Find the first occurrence of a substring; error if any non-null string misses it.
Matches pd.Series.str.index(sub).
Sourcepub fn rindex(&self, sub: &str) -> Result<Series, FrameError>
pub fn rindex(&self, sub: &str) -> Result<Series, FrameError>
Find the last occurrence of a substring; error if any non-null string misses it.
Matches pd.Series.str.rindex(sub).
Sourcepub fn index_of(&self, sub: &str) -> Result<Series, FrameError>
pub fn index_of(&self, sub: &str) -> Result<Series, FrameError>
Find the first occurrence of a substring; error if not found.
Matches pd.Series.str.index(sub). Like find() but raises
an error for missing values (here, returns NaN for not-found).
Per br-frankenpandas-02ae2b: char-based, not byte-based.
Sourcepub fn rindex_of(&self, sub: &str) -> Result<Series, FrameError>
pub fn rindex_of(&self, sub: &str) -> Result<Series, FrameError>
Find the last occurrence of a substring; error if not found.
Matches pd.Series.str.rindex(sub). Like rfind() but raises
an error for missing values (here, returns NaN for not-found).
Per br-frankenpandas-02ae2b: char-based, not byte-based.
Sourcepub fn expandtabs(&self, tabsize: usize) -> Result<Series, FrameError>
pub fn expandtabs(&self, tabsize: usize) -> Result<Series, FrameError>
Replace tab characters with spaces.
Matches pd.Series.str.expandtabs(tabsize).
Sourcepub fn removeprefix(&self, prefix: &str) -> Result<Series, FrameError>
pub fn removeprefix(&self, prefix: &str) -> Result<Series, FrameError>
Remove a prefix from each string if present.
Matches pd.Series.str.removeprefix(prefix) (Python 3.9+ / pandas 1.4+).
Sourcepub fn removesuffix(&self, suffix: &str) -> Result<Series, FrameError>
pub fn removesuffix(&self, suffix: &str) -> Result<Series, FrameError>
Remove a suffix from each string if present.
Matches pd.Series.str.removesuffix(suffix) (Python 3.9+ / pandas 1.4+).
Sourcepub fn casefold(&self) -> Result<Series, FrameError>
pub fn casefold(&self) -> Result<Series, FrameError>
Aggressive Unicode case folding.
Matches pd.Series.str.casefold(). Like lower() but more aggressive
for Unicode (e.g., German sharp s).
Sourcepub fn swapcase(&self) -> Result<Series, FrameError>
pub fn swapcase(&self) -> Result<Series, FrameError>
Swap the case of each character.
Matches pd.Series.str.swapcase().
Sourcepub fn partition(
&self,
sep: &str,
) -> Result<(Series, Series, Series), FrameError>
pub fn partition( &self, sep: &str, ) -> Result<(Series, Series, Series), FrameError>
Split each string at the first occurrence of separator.
Matches pd.Series.str.partition(sep). Returns a tuple-like string
“(before, sep, after)” or “(original, ‘’, ‘’)” if sep not found.
Returns three separate values as a comma-separated string for simplicity.
Sourcepub fn rpartition(
&self,
sep: &str,
) -> Result<(Series, Series, Series), FrameError>
pub fn rpartition( &self, sep: &str, ) -> Result<(Series, Series, Series), FrameError>
Split each string at the last occurrence of separator.
Matches pd.Series.str.rpartition(sep).
Sourcepub fn partition_df(&self, sep: &str) -> Result<DataFrame, FrameError>
pub fn partition_df(&self, sep: &str) -> Result<DataFrame, FrameError>
Split the string at the first occurrence of sep and return a DataFrame.
Matches pd.Series.str.partition(sep).
Sourcepub fn rpartition_df(&self, sep: &str) -> Result<DataFrame, FrameError>
pub fn rpartition_df(&self, sep: &str) -> Result<DataFrame, FrameError>
Split the string at the last occurrence of sep and return a DataFrame.
Matches pd.Series.str.rpartition(sep).
Sourcepub fn get_dummies(&self, sep: &str) -> Result<DataFrame, FrameError>
pub fn get_dummies(&self, sep: &str) -> Result<DataFrame, FrameError>
Split each string by separator and return a DataFrame of indicator columns.
Matches pd.Series.str.get_dummies(sep). Each unique token becomes
a column with 1/0 indicators.
Sourcepub fn encode(&self, _encoding: &str) -> Result<Series, FrameError>
pub fn encode(&self, _encoding: &str) -> Result<Series, FrameError>
Encode strings to bytes (returns byte length as Float64).
Matches pd.Series.str.encode(encoding). Since Rust strings are
always UTF-8, this returns the byte length of each string for
the “utf-8” encoding. Per br-frankenpandas-rg8ys.6.6: pandas
returns float64 for nullable integers (nulls become NaN).
Sourcepub fn decode(&self, _encoding: &str) -> Result<Series, FrameError>
pub fn decode(&self, _encoding: &str) -> Result<Series, FrameError>
Decode bytes to strings (identity operation in Rust).
Matches pd.Series.str.decode(encoding). Since Rust strings are
always UTF-8, this is an identity operation.
Sourcepub fn translate(&self, from: &str, to: &str) -> Result<Series, FrameError>
pub fn translate(&self, from: &str, to: &str) -> Result<Series, FrameError>
Translate characters using a mapping table.
Matches pd.Series.str.translate(table). Replaces each character
found in from with the corresponding character in to. If to
is shorter than from, excess mapped characters are deleted.
Examples found in repository?
25fn golden() -> String {
26 let mut out = String::new();
27 let s = s_from(vec!["hello world", "abcXYZ", "", "duplicate-d"]);
28
29 // basic 1:1 replacement
30 let r = s.str().translate("lo", "LO").unwrap();
31 out.push_str(&format!("basic={:?}\n", r.values()));
32
33 // `to` shorter than `from` => extra source chars are DELETED. 'o'->'0',
34 // but 'l' (index 1) has no target so every 'l' is removed.
35 let r2 = s.str().translate("ol", "0").unwrap();
36 out.push_str(&format!("delete={:?}\n", r2.values()));
37
38 // duplicate source char in `from`: first occurrence wins ('d'->'1', not '2')
39 let r3 = s.str().translate("dd", "12").unwrap();
40 out.push_str(&format!("dupsrc={:?}\n", r3.values()));
41
42 // empty table: identity
43 let r4 = s.str().translate("", "").unwrap();
44 out.push_str(&format!("empty={:?}\n", r4.values()));
45 out
46}
47
48fn main() {
49 let g = golden();
50 print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
51
52 // Large translation table + many long strings.
53 let from: String = (0u32..2000).filter_map(char::from_u32).collect();
54 let to: String = (1000u32..3000).filter_map(char::from_u32).collect();
55 // Strings made of chars near the END of the `from` table: the linear scan
56 // must traverse ~all of `from` per char (the realistic O(|from|) cost).
57 let base: String = (1800u32..2000).filter_map(char::from_u32).collect();
58 let one = base.repeat(20); // ~4000 chars/string
59 let n = 2_000;
60 let s = s_from(vec![one.as_str(); n]);
61
62 // warmup
63 let _ = s.str().translate(&from, &to).unwrap();
64
65 let t = Instant::now();
66 let r = s.str().translate(&from, &to).unwrap();
67 let d = t.elapsed();
68 assert_eq!(r.len(), n);
69
70 println!(
71 "TIMING n={n} from_len={} translate={:.3}ms",
72 from.chars().count(),
73 d.as_secs_f64() * 1e3
74 );
75}