Struct StringAccessor

Source

pub struct StringAccessor<'a> { /* private fields */ }

Expand description

Created by Series::str(). Provides string manipulation methods analogous to pandas Series.str namespace.

Implementations§

Source §

impl StringAccessor<'_>

Source

pub fn lower(&self) -> Result<Series, FrameError>

Convert strings to lowercase.

Source

pub fn upper(&self) -> Result<Series, FrameError>

Convert strings to uppercase.

Source

pub fn strip(&self) -> Result<Series, FrameError>

Strip leading and trailing whitespace.

Source

pub fn strip_chars(&self, to_strip: &str) -> Result<Series, FrameError>

Strip leading and trailing characters in to_strip.

Matches pd.Series.str.strip(chars).

Source

pub fn lstrip(&self) -> Result<Series, FrameError>

Strip leading whitespace.

Source

pub fn lstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>

Strip leading characters in to_strip.

Matches pd.Series.str.lstrip(chars).

Source

pub fn rstrip(&self) -> Result<Series, FrameError>

Strip trailing whitespace.

Source

pub fn rstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>

Strip trailing characters in to_strip.

Matches pd.Series.str.rstrip(chars).

Source

pub fn contains(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string contains a pattern.

Source

pub fn contains_any(&self, pats: &[&str]) -> Result<Series, FrameError>

Check whether each string contains any of the given patterns.

Matches pd.Series.str.contains('|'.join(pats), regex=True) but for literals.

Examples found in repository ?

examples/bench_contains_any.rs (line 29)

25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["hello world", "foobar", "BAZ qux", "", "a.b+c"]);
28
29    let r = s.str().contains_any(&["world", "qux"]).unwrap();
30    out.push_str(&format!("hit={:?}\n", r.values()));
31
32    // Regex metacharacters must be treated as LITERALS.
33    let r2 = s.str().contains_any(&["a.b+c", "zz"]).unwrap();
34    out.push_str(&format!("literal_meta={:?}\n", r2.values()));
35
36    // Case sensitive (no match for lowercase 'baz').
37    let r3 = s.str().contains_any(&["baz"]).unwrap();
38    out.push_str(&format!("case={:?}\n", r3.values()));
39
40    // Empty pattern set => all false.
41    let r4 = s.str().contains_any(&[]).unwrap();
42    out.push_str(&format!("empty={:?}\n", r4.values()));
43
44    // Empty-string pattern matches every (non-null) string.
45    let r5 = s.str().contains_any(&["", "zz"]).unwrap();
46    out.push_str(&format!("empty_pat={:?}\n", r5.values()));
47    out
48}
49
50fn main() {
51    let g = golden();
52    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
53
54    // Many patterns, mostly-missing, over many medium strings (worst case:
55    // every pattern scanned per string in the naive path).
56    let pats_owned: Vec<String> = (0..200).map(|i| format!("needle{i:04}xyz")).collect();
57    let pats: Vec<&str> = pats_owned.iter().map(String::as_str).collect();
58    let base = "the quick brown fox jumps over the lazy dog ".repeat(4);
59    let n = 20_000;
60    let s = s_from(vec![base.as_str(); n]);
61
62    // warmup
63    let _ = s.str().contains_any(&pats).unwrap();
64
65    let t = Instant::now();
66    let r = s.str().contains_any(&pats).unwrap();
67    let d = t.elapsed();
68    assert_eq!(r.len(), n);
69
70    println!(
71        "TIMING n={n} npats={} contains_any={:.3}ms",
72        pats.len(),
73        d.as_secs_f64() * 1e3
74    );
75}

Source

pub fn contains_with_options( &self, pat: &str, case: bool, na: Option<bool>, regex: bool, ) -> Result<Series, FrameError>

Check whether each string contains pat with case/na/regex options.

Matches pd.Series.str.contains(pat, case=True, na=None, regex=True):

regex=true treats pat as a regular expression; regex=false treats it as a literal substring search.
case=false makes the match case-insensitive.
na=None propagates nulls as NaN; na=Some(bool) replaces null entries with that boolean, matching pandas’ fill-on-NaN behavior.

Source

pub fn replace(&self, pat: &str, repl: &str) -> Result<Series, FrameError>

Replace occurrences of a pattern with a replacement string.

Source

pub fn replace_with_options( &self, pat: &str, repl: &str, n: Option<usize>, case: bool, regex: bool, ) -> Result<Series, FrameError>

Replace with pandas-parity case/regex/n options.

Matches pd.Series.str.replace(pat, repl, n=-1, case=True, regex=True):

regex=true compiles pat as a regex; regex=false treats it as a literal substring.
case=false performs case-insensitive matching. For literal mode this is implemented via case-folded find scanning.
n=None (pandas -1) replaces every occurrence. n=Some(k) caps at the first k replacements per cell.

Source

pub fn startswith(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string starts with a prefix.

Source

pub fn startswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>

Check whether each string starts with any of the given prefixes.

Matches pd.Series.str.startswith((p1, p2, ...)).

Examples found in repository ?

examples/bench_startswith_any.rs (line 29)

25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29    let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30    out.push_str(&format!("sw_hit={:?}\n", r.values()));
31    // metacharacters stay literal
32    let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33    out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34    let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35    out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36    // empty pattern set => all false
37    let r4 = s.str().startswith_any(&[]).unwrap();
38    out.push_str(&format!("empty={:?}\n", r4.values()));
39    // empty-string pattern matches every non-null string
40    let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41    out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43    // with_na variants (null fill).
44    let sn = Series::from_values(
45        "s",
46        vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47        vec![
48            Scalar::Utf8("https://z".into()),
49            Scalar::Null(fp_types::NullKind::NaN),
50        ],
51    )
52    .unwrap();
53    let rn = sn
54        .str()
55        .startswith_any_with_na(&["https://"], Some(true))
56        .unwrap();
57    out.push_str(&format!("na_fill={:?}\n", rn.values()));
58    out
59}
60
61fn main() {
62    let g = golden();
63    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
64
65    // Worst case: many prefixes sharing a long common stem.
66    let pats_owned: Vec<String> = (0..200)
67        .map(|i| format!("https://cdn.example.com/path/{i:04}/"))
68        .collect();
69    let pats: Vec<&str> = pats_owned.iter().map(String::as_str).collect();
70    let one = "https://cdn.example.com/path/9999/asset/file/deep/name.bin";
71    let n = 40_000;
72    let s = s_from(vec![one; n]);
73
74    let _ = s.str().startswith_any(&pats).unwrap(); // warmup
75
76    let t = Instant::now();
77    let r = s.str().startswith_any(&pats).unwrap();
78    let d = t.elapsed();
79    assert_eq!(r.len(), n);
80
81    println!(
82        "TIMING n={n} npats={} startswith_any={:.3}ms",
83        pats.len(),
84        d.as_secs_f64() * 1e3
85    );
86}

Source

pub fn startswith_with_na( &self, pat: &str, na: Option<bool>, ) -> Result<Series, FrameError>

Check whether each string starts with pat, replacing nulls with na.

Matches pd.Series.str.startswith(pat, na=...). When na is None (Rust None), nulls propagate as NaN — same as the default pandas behavior on 2.2+. When na is Some(true) / Some(false), null entries are replaced with that boolean.

Source

pub fn startswith_any_with_na( &self, pats: &[&str], na: Option<bool>, ) -> Result<Series, FrameError>

Check whether each string starts with any of pats, replacing nulls with na.

Matches pd.Series.str.startswith((p1, p2, ...), na=...).

Examples found in repository ?

examples/bench_startswith_any.rs (line 55)

25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29    let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30    out.push_str(&format!("sw_hit={:?}\n", r.values()));
31    // metacharacters stay literal
32    let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33    out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34    let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35    out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36    // empty pattern set => all false
37    let r4 = s.str().startswith_any(&[]).unwrap();
38    out.push_str(&format!("empty={:?}\n", r4.values()));
39    // empty-string pattern matches every non-null string
40    let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41    out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43    // with_na variants (null fill).
44    let sn = Series::from_values(
45        "s",
46        vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47        vec![
48            Scalar::Utf8("https://z".into()),
49            Scalar::Null(fp_types::NullKind::NaN),
50        ],
51    )
52    .unwrap();
53    let rn = sn
54        .str()
55        .startswith_any_with_na(&["https://"], Some(true))
56        .unwrap();
57    out.push_str(&format!("na_fill={:?}\n", rn.values()));
58    out
59}

Source

pub fn endswith(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string ends with a suffix.

Source

pub fn endswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>

Check whether each string ends with any of the given suffixes.

Matches pd.Series.str.endswith((s1, s2, ...)).

Examples found in repository ?

examples/bench_startswith_any.rs (line 34)

25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["https://a.com", "ftp://x", "file.txt", "", "a.b+c"]);
28
29    let r = s.str().startswith_any(&["https://", "ftp://"]).unwrap();
30    out.push_str(&format!("sw_hit={:?}\n", r.values()));
31    // metacharacters stay literal
32    let r2 = s.str().startswith_any(&["a.b+c"]).unwrap();
33    out.push_str(&format!("sw_meta={:?}\n", r2.values()));
34    let r3 = s.str().endswith_any(&[".txt", ".com"]).unwrap();
35    out.push_str(&format!("ew_hit={:?}\n", r3.values()));
36    // empty pattern set => all false
37    let r4 = s.str().startswith_any(&[]).unwrap();
38    out.push_str(&format!("empty={:?}\n", r4.values()));
39    // empty-string pattern matches every non-null string
40    let r5 = s.str().endswith_any(&["", "zz"]).unwrap();
41    out.push_str(&format!("empty_pat={:?}\n", r5.values()));
42
43    // with_na variants (null fill).
44    let sn = Series::from_values(
45        "s",
46        vec![IndexLabel::Int64(0), IndexLabel::Int64(1)],
47        vec![
48            Scalar::Utf8("https://z".into()),
49            Scalar::Null(fp_types::NullKind::NaN),
50        ],
51    )
52    .unwrap();
53    let rn = sn
54        .str()
55        .startswith_any_with_na(&["https://"], Some(true))
56        .unwrap();
57    out.push_str(&format!("na_fill={:?}\n", rn.values()));
58    out
59}

Source

pub fn endswith_with_na( &self, pat: &str, na: Option<bool>, ) -> Result<Series, FrameError>

Check whether each string ends with pat, replacing nulls with na.

Matches pd.Series.str.endswith(pat, na=...).

Source

pub fn endswith_any_with_na( &self, pats: &[&str], na: Option<bool>, ) -> Result<Series, FrameError>

Check whether each string ends with any of pats, replacing nulls with na.

Matches pd.Series.str.endswith((s1, s2, ...), na=...).

Source

pub fn len(&self) -> Result<Series, FrameError>

Get the length of each string (character count, not byte count).

Per br-frankenpandas-rg8ys.6.6: pandas returns float64 (not int64) to represent nullable integers. Nulls become NaN.

Source

pub fn slice( &self, start: Option<i64>, stop: Option<i64>, step: Option<i64>, ) -> Result<Series, FrameError>

Slice each string from start to end.

Source

pub fn slice_replace( &self, start: Option<i64>, stop: Option<i64>, repl: &str, ) -> Result<Series, FrameError>

Replace a positional slice of each string.

Matches pd.Series.str.slice_replace(start, stop, repl), including Python-style NEGATIVE start/stop. The replaced span is chars[start..] up to chars[max(start, stop)..], i.e. s[:start] + repl + s[stop:] with stop never preceding start. Verified vs live pandas 2.2.3: “abcde”.slice_replace(-2, None, “X”) -> “abcX”; .slice_replace(0, -1, “X”) -> “Xe”; .slice_replace(3, 1, “X”) -> “abcXde”.

Source

pub fn split_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>

Split each string by a separator and return the n-th element.

Source

pub fn rsplit_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>

Split each string from the right and return the n-th element.

Matches pd.Series.str.rsplit(pat).str[n].

Source

pub fn split_df(&self, pat: &str) -> Result<DataFrame, FrameError>

Split each string by a separator and return a DataFrame.

Matches pd.Series.str.split(pat, expand=True).

Source

pub fn split_df_n( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>

Split each string with a maximum number of splits, returning a DataFrame.

Matches pd.Series.str.split(pat, n=..., expand=True). n=None means split on every occurrence (default behavior); n=Some(k) caps the result at k + 1 parts by doing at most k splits.

Source

pub fn rsplit_df( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>

Split each string from the right and return a DataFrame.

Matches pd.Series.str.rsplit(pat, expand=True).

Source

pub fn split_count(&self, pat: &str) -> Result<Series, FrameError>

Count the number of parts when splitting by pattern.

Matches pd.Series.str.split(pat).str.len(). Returns Int64 count.

Source

pub fn join(&self, from: &str, sep: &str) -> Result<Series, FrameError>

Join/concatenate each string element with a separator.

Applies to each element: useful after conceptual split operations. Replaces occurrences of from with sep.

Source

pub fn capitalize(&self) -> Result<Series, FrameError>

Capitalize the first character and lowercase the rest (pandas behavior).

Source

pub fn title(&self) -> Result<Series, FrameError>

Title case each string.

Source

pub fn repeat(&self, n: usize) -> Result<Series, FrameError>

Repeat each string n times.

Source

pub fn pad( &self, width: usize, side: &str, fillchar: char, ) -> Result<Series, FrameError>

Pad strings to a minimum width with a fill character.

Source

pub fn contains_regex(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string matches a regex pattern.

Analogous to pandas.Series.str.contains(pat, regex=True).

Source

pub fn replace_regex(&self, pat: &str, repl: &str) -> Result<Series, FrameError>

Replace first occurrence of a regex pattern with a replacement string.

Analogous to pandas.Series.str.replace(pat, repl, regex=True). The replacement string supports backreferences ($1, $2, etc.).

Source

pub fn replace_regex_all( &self, pat: &str, repl: &str, ) -> Result<Series, FrameError>

Replace all occurrences of a regex pattern with a replacement string.

Analogous to pandas.Series.str.replace(pat, repl, regex=True, n=-1).

Source

pub fn extract(&self, pat: &str) -> Result<Series, FrameError>

Extract the first match of a regex capture group.

Analogous to pandas.Series.str.extract(pat, expand=False) for a single-group pattern. Returns the first capture group (group 1) if the pattern contains a group, otherwise returns the full match (group 0). Non-matching strings produce Null. If the pattern’s single capture group is named, the group name is used as the Series name (matching pandas 2.2 behavior).

Source

pub fn extract_df(&self, pat: &str) -> Result<DataFrame, FrameError>

Extract multiple capture groups as a DataFrame.

Matches pd.Series.str.extract(pat) when pat has multiple groups.

Source

pub fn count(&self, pat: &str) -> Result<Series, FrameError>

Count occurrences of pattern in each string.

Matches pd.Series.str.count(pat).

Source

pub fn extract_to_frame(&self, pat: &str) -> Result<DataFrame, FrameError>

Extract capture groups from a regex, returning a DataFrame.

Analogous to pandas.Series.str.extract(pat, expand=True). Returns a DataFrame where each column corresponds to a capture group. If the pattern has named groups (?P<name>...), those names become column names; otherwise columns are numbered “0”, “1”, etc. Non-matching rows produce NaN in every group column.

Source

pub fn extractall(&self, pat: &str) -> Result<DataFrame, FrameError>

Extract all matches of a regex capture group, returning a DataFrame.

Analogous to pandas.Series.str.extractall(pat). Returns a DataFrame where each row is a match, with a composite index “original_idx, match_n”. If the pattern contains capture groups, returns one column per group. Named capture groups ((?P<name>...) / (?<name>...)) produce columns with that name; unnamed groups use positional names 0, 1, ....

Source

pub fn split_expand(&self, pat: &str) -> Result<DataFrame, FrameError>

Split strings by pattern, expanding into a DataFrame.

Analogous to pandas.Series.str.split(pat, expand=True). Returns a DataFrame where each column is a split part. Shorter splits are padded with NaN.

Source

pub fn split_expand_n( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>

Split strings by pattern with an optional split limit, expanding into a DataFrame.

Analogous to pandas.Series.str.split(pat, n=..., expand=True). Shorter splits are padded with NaN.

Source

pub fn count_matches(&self, pat: &str) -> Result<Series, FrameError>

Count non-overlapping matches of a regex pattern in each string.

Analogous to pandas.Series.str.count(pat).

Source

pub fn count_literal(&self, pat: &str) -> Result<Series, FrameError>

Count non-overlapping occurrences of a literal substring.

Matches pd.Series.str.count(pat) for literal patterns.

Source

pub fn findall(&self, pat: &str, sep: &str) -> Result<Series, FrameError>

Find all non-overlapping matches and return them joined by a separator.

Since Series cannot hold list values, matches are joined with sep. Non-matching strings produce Null.

Source

pub fn fullmatch(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string fully matches a regex pattern.

Unlike contains_regex which searches for a match anywhere, this requires the entire string to match (anchored ^...$).

Source

pub fn fullmatch_with_options( &self, pat: &str, case: bool, na: Option<bool>, ) -> Result<Series, FrameError>

Fullmatch with case/na option parity.

Matches pd.Series.str.fullmatch(pat, case=True, na=None). case=false enables inline (?i) matching. na=None propagates nulls as NaN; na=Some(bool) replaces null entries with the chosen boolean.

Source

pub fn match_regex(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string matches a regex pattern at the start.

Analogous to pandas.Series.str.match(pat) which uses Python’s re.match() (anchored at the beginning of the string).

Source

pub fn match(&self, pat: &str) -> Result<Series, FrameError>

Check whether each string matches a regex pattern at the start.

Rust raw-identifier spelling for pandas.Series.str.match(pat).

Source

pub fn match_regex_with_options( &self, pat: &str, case: bool, na: Option<bool>, ) -> Result<Series, FrameError>

Match (start-anchored) with case/na option parity.

Matches pd.Series.str.match(pat, case=True, na=None) — Python’s re.match semantics (anchored at start, not end). case=false enables inline (?i) matching; na handling mirrors fullmatch_with_options.

Source

pub fn split_regex_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>

Split each string by a regex pattern and return the n-th element.

Analogous to pandas.Series.str.split(pat, regex=True).str[n].

Source

pub fn zfill(&self, width: usize) -> Result<Series, FrameError>

Zero-fill strings to specified width.

Matches pd.Series.str.zfill(width).

Source

pub fn center(&self, width: usize, fillchar: char) -> Result<Series, FrameError>

Center-align strings within specified width.

Matches pd.Series.str.center(width, fillchar).

Source

pub fn ljust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>

Left-align strings within specified width (pad on right).

Matches pd.Series.str.ljust(width, fillchar).

Source

pub fn rjust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>

Right-align strings within specified width (pad on left).

Matches pd.Series.str.rjust(width, fillchar).

Source

pub fn isdigit(&self) -> Result<Series, FrameError>

Check if each string is composed of digits only.

Matches pd.Series.str.isdigit().

Source

pub fn isalpha(&self) -> Result<Series, FrameError>

Check if each string is composed of alphabetic characters only.

Matches pd.Series.str.isalpha().

Source

pub fn isalnum(&self) -> Result<Series, FrameError>

Check if each string is alphanumeric.

Matches pd.Series.str.isalnum().

Source

pub fn isascii(&self) -> Result<Series, FrameError>

Check if each string is composed of ASCII characters only.

Matches pd.Series.str.isascii().

Source

pub fn isspace(&self) -> Result<Series, FrameError>

Check if each string is composed of whitespace only.

Matches pd.Series.str.isspace().

Source

pub fn islower(&self) -> Result<Series, FrameError>

Check if each string is lowercase.

Matches pd.Series.str.islower().

Source

pub fn isupper(&self) -> Result<Series, FrameError>

Check if each string is uppercase.

Matches pd.Series.str.isupper().

Source

pub fn isnumeric(&self) -> Result<Series, FrameError>

Check if each string is numeric (including Unicode numeric chars).

Matches pd.Series.str.isnumeric().

Source

pub fn get(&self, i: i64) -> Result<Series, FrameError>

Extract character at position from each string.

Matches pd.Series.str.get(i). Returns NaN if index is out of bounds.

Source

pub fn wrap(&self, width: usize) -> Result<Series, FrameError>

Wrap long lines at specified width.

Matches pd.Series.str.wrap(width). Inserts newlines to wrap text.

Source

pub fn wrap_with_drop_whitespace( &self, width: usize, drop_whitespace: bool, ) -> Result<Series, FrameError>

Wrap long lines while controlling whether boundary whitespace is dropped.

Matches pd.Series.str.wrap(width, drop_whitespace=...) for the supported strict-mode behavior covered by the conformance fixtures.

Source

pub fn normalize(&self, form: &str) -> Result<Series, FrameError>

Normalize Unicode strings.

Matches pd.Series.str.normalize(form) for NFC, NFKC, NFD, and NFKD.

Source

pub fn isdecimal(&self) -> Result<Series, FrameError>

Check if each string is a valid decimal number.

Matches pd.Series.str.isdecimal().

Source

pub fn istitle(&self) -> Result<Series, FrameError>

Check if each string is titlecased.

Matches pd.Series.str.istitle().

Source

pub fn cat(&self, sep: &str) -> Result<String, FrameError>

Return the string representation of each element.

Matches pd.Series.str.cat() - concatenate strings. Concatenates all strings in the Series with a separator.

Source

pub fn cat_series( &self, others: &Series, sep: &str, na_rep: Option<&str>, ) -> Result<Series, FrameError>

Concatenate strings element-wise with another Series.

Matches pd.Series.str.cat(others, sep, na_rep). na_rep is used to replace Null values in either Series.

Source

pub fn cat_list( &self, others: &[&Series], sep: &str, na_rep: Option<&str>, ) -> Result<Series, FrameError>

Concatenate strings element-wise with multiple other Series.

Matches pd.Series.str.cat([s1, s2, ...], sep, na_rep).

Source

pub fn find(&self, sub: &str) -> Result<Series, FrameError>

Find the first occurrence of a substring.

Matches pd.Series.str.find(sub). Returns -1 if not found. Per br-frankenpandas-02ae2b: pandas returns CHAR-based positions (Python 3 strings are char-indexed), not byte positions.

Source

pub fn rfind(&self, sub: &str) -> Result<Series, FrameError>

Find the last occurrence of a substring.

Matches pd.Series.str.rfind(sub). Returns -1 if not found. Per br-frankenpandas-02ae2b: char-based, not byte-based.

Source

pub fn index(&self, sub: &str) -> Result<Series, FrameError>

Find the first occurrence of a substring; error if any non-null string misses it.

Matches pd.Series.str.index(sub).

Source

pub fn rindex(&self, sub: &str) -> Result<Series, FrameError>

Find the last occurrence of a substring; error if any non-null string misses it.

Matches pd.Series.str.rindex(sub).

Source

pub fn index_of(&self, sub: &str) -> Result<Series, FrameError>

Find the first occurrence of a substring; error if not found.

Matches pd.Series.str.index(sub). Like find() but raises an error for missing values (here, returns NaN for not-found). Per br-frankenpandas-02ae2b: char-based, not byte-based.

Source

pub fn rindex_of(&self, sub: &str) -> Result<Series, FrameError>

Find the last occurrence of a substring; error if not found.

Matches pd.Series.str.rindex(sub). Like rfind() but raises an error for missing values (here, returns NaN for not-found). Per br-frankenpandas-02ae2b: char-based, not byte-based.

Source

pub fn expandtabs(&self, tabsize: usize) -> Result<Series, FrameError>

Replace tab characters with spaces.

Matches pd.Series.str.expandtabs(tabsize).

Source

pub fn removeprefix(&self, prefix: &str) -> Result<Series, FrameError>

Remove a prefix from each string if present.

Matches pd.Series.str.removeprefix(prefix) (Python 3.9+ / pandas 1.4+).

Source

pub fn removesuffix(&self, suffix: &str) -> Result<Series, FrameError>

Remove a suffix from each string if present.

Matches pd.Series.str.removesuffix(suffix) (Python 3.9+ / pandas 1.4+).

Source

pub fn casefold(&self) -> Result<Series, FrameError>

Aggressive Unicode case folding.

Matches pd.Series.str.casefold(). Like lower() but more aggressive for Unicode (e.g., German sharp s).

Source

pub fn swapcase(&self) -> Result<Series, FrameError>

Swap the case of each character.

Matches pd.Series.str.swapcase().

Source

pub fn partition( &self, sep: &str, ) -> Result<(Series, Series, Series), FrameError>

Split each string at the first occurrence of separator.

Matches pd.Series.str.partition(sep). Returns a tuple-like string “(before, sep, after)” or “(original, ‘’, ‘’)” if sep not found. Returns three separate values as a comma-separated string for simplicity.

Source

pub fn rpartition( &self, sep: &str, ) -> Result<(Series, Series, Series), FrameError>

Split each string at the last occurrence of separator.

Matches pd.Series.str.rpartition(sep).

Source

pub fn partition_df(&self, sep: &str) -> Result<DataFrame, FrameError>

Split the string at the first occurrence of sep and return a DataFrame.

Matches pd.Series.str.partition(sep).

Source

pub fn rpartition_df(&self, sep: &str) -> Result<DataFrame, FrameError>

Split the string at the last occurrence of sep and return a DataFrame.

Matches pd.Series.str.rpartition(sep).

Source

pub fn get_dummies(&self, sep: &str) -> Result<DataFrame, FrameError>

Split each string by separator and return a DataFrame of indicator columns.

Matches pd.Series.str.get_dummies(sep). Each unique token becomes a column with 1/0 indicators.

Source

pub fn encode(&self, _encoding: &str) -> Result<Series, FrameError>

Encode strings to bytes (returns byte length as Float64).

Matches pd.Series.str.encode(encoding). Since Rust strings are always UTF-8, this returns the byte length of each string for the “utf-8” encoding. Per br-frankenpandas-rg8ys.6.6: pandas returns float64 for nullable integers (nulls become NaN).

Source

pub fn decode(&self, _encoding: &str) -> Result<Series, FrameError>

Decode bytes to strings (identity operation in Rust).

Matches pd.Series.str.decode(encoding). Since Rust strings are always UTF-8, this is an identity operation.

Source

pub fn translate(&self, from: &str, to: &str) -> Result<Series, FrameError>

Translate characters using a mapping table.

Matches pd.Series.str.translate(table). Replaces each character found in from with the corresponding character in to. If to is shorter than from, excess mapped characters are deleted.

Examples found in repository ?

examples/bench_translate.rs (line 30)

25fn golden() -> String {
26    let mut out = String::new();
27    let s = s_from(vec!["hello world", "abcXYZ", "", "duplicate-d"]);
28
29    // basic 1:1 replacement
30    let r = s.str().translate("lo", "LO").unwrap();
31    out.push_str(&format!("basic={:?}\n", r.values()));
32
33    // `to` shorter than `from` => extra source chars are DELETED. 'o'->'0',
34    // but 'l' (index 1) has no target so every 'l' is removed.
35    let r2 = s.str().translate("ol", "0").unwrap();
36    out.push_str(&format!("delete={:?}\n", r2.values()));
37
38    // duplicate source char in `from`: first occurrence wins ('d'->'1', not '2')
39    let r3 = s.str().translate("dd", "12").unwrap();
40    out.push_str(&format!("dupsrc={:?}\n", r3.values()));
41
42    // empty table: identity
43    let r4 = s.str().translate("", "").unwrap();
44    out.push_str(&format!("empty={:?}\n", r4.values()));
45    out
46}
47
48fn main() {
49    let g = golden();
50    print!("GOLDEN_BEGIN\n{g}GOLDEN_END\n");
51
52    // Large translation table + many long strings.
53    let from: String = (0u32..2000).filter_map(char::from_u32).collect();
54    let to: String = (1000u32..3000).filter_map(char::from_u32).collect();
55    // Strings made of chars near the END of the `from` table: the linear scan
56    // must traverse ~all of `from` per char (the realistic O(|from|) cost).
57    let base: String = (1800u32..2000).filter_map(char::from_u32).collect();
58    let one = base.repeat(20); // ~4000 chars/string
59    let n = 2_000;
60    let s = s_from(vec![one.as_str(); n]);
61
62    // warmup
63    let _ = s.str().translate(&from, &to).unwrap();
64
65    let t = Instant::now();
66    let r = s.str().translate(&from, &to).unwrap();
67    let d = t.elapsed();
68    assert_eq!(r.len(), n);
69
70    println!(
71        "TIMING n={n} from_len={} translate={:.3}ms",
72        from.chars().count(),
73        d.as_secs_f64() * 1e3
74    );
75}

Auto Trait Implementations§

§

impl<'a> UnwindSafe for StringAccessor<'a>

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct StringAccessor Copy item path

Implementations§

impl StringAccessor<'_>

pub fn lower(&self) -> Result<Series, FrameError>

pub fn upper(&self) -> Result<Series, FrameError>

pub fn strip(&self) -> Result<Series, FrameError>

pub fn strip_chars(&self, to_strip: &str) -> Result<Series, FrameError>

pub fn lstrip(&self) -> Result<Series, FrameError>

pub fn lstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>

pub fn rstrip(&self) -> Result<Series, FrameError>

pub fn rstrip_chars(&self, to_strip: &str) -> Result<Series, FrameError>

pub fn contains(&self, pat: &str) -> Result<Series, FrameError>

pub fn contains_any(&self, pats: &[&str]) -> Result<Series, FrameError>

pub fn contains_with_options( &self, pat: &str, case: bool, na: Option<bool>, regex: bool, ) -> Result<Series, FrameError>

pub fn replace(&self, pat: &str, repl: &str) -> Result<Series, FrameError>

pub fn replace_with_options( &self, pat: &str, repl: &str, n: Option<usize>, case: bool, regex: bool, ) -> Result<Series, FrameError>

pub fn startswith(&self, pat: &str) -> Result<Series, FrameError>

pub fn startswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>

pub fn startswith_with_na( &self, pat: &str, na: Option<bool>, ) -> Result<Series, FrameError>

pub fn startswith_any_with_na( &self, pats: &[&str], na: Option<bool>, ) -> Result<Series, FrameError>

pub fn endswith(&self, pat: &str) -> Result<Series, FrameError>

pub fn endswith_any(&self, pats: &[&str]) -> Result<Series, FrameError>

pub fn endswith_with_na( &self, pat: &str, na: Option<bool>, ) -> Result<Series, FrameError>

pub fn endswith_any_with_na( &self, pats: &[&str], na: Option<bool>, ) -> Result<Series, FrameError>

pub fn len(&self) -> Result<Series, FrameError>

pub fn slice( &self, start: Option<i64>, stop: Option<i64>, step: Option<i64>, ) -> Result<Series, FrameError>

pub fn slice_replace( &self, start: Option<i64>, stop: Option<i64>, repl: &str, ) -> Result<Series, FrameError>

pub fn split_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>

pub fn rsplit_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>

pub fn split_df(&self, pat: &str) -> Result<DataFrame, FrameError>

pub fn split_df_n( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>

pub fn rsplit_df( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>

pub fn split_count(&self, pat: &str) -> Result<Series, FrameError>

pub fn join(&self, from: &str, sep: &str) -> Result<Series, FrameError>

pub fn capitalize(&self) -> Result<Series, FrameError>

pub fn title(&self) -> Result<Series, FrameError>

pub fn repeat(&self, n: usize) -> Result<Series, FrameError>

pub fn pad( &self, width: usize, side: &str, fillchar: char, ) -> Result<Series, FrameError>

pub fn contains_regex(&self, pat: &str) -> Result<Series, FrameError>

pub fn replace_regex(&self, pat: &str, repl: &str) -> Result<Series, FrameError>

pub fn replace_regex_all( &self, pat: &str, repl: &str, ) -> Result<Series, FrameError>

pub fn extract(&self, pat: &str) -> Result<Series, FrameError>

pub fn extract_df(&self, pat: &str) -> Result<DataFrame, FrameError>

pub fn count(&self, pat: &str) -> Result<Series, FrameError>

pub fn extract_to_frame(&self, pat: &str) -> Result<DataFrame, FrameError>

pub fn extractall(&self, pat: &str) -> Result<DataFrame, FrameError>

pub fn split_expand(&self, pat: &str) -> Result<DataFrame, FrameError>

pub fn split_expand_n( &self, pat: &str, n: Option<usize>, ) -> Result<DataFrame, FrameError>

pub fn count_matches(&self, pat: &str) -> Result<Series, FrameError>

pub fn count_literal(&self, pat: &str) -> Result<Series, FrameError>

pub fn findall(&self, pat: &str, sep: &str) -> Result<Series, FrameError>

pub fn fullmatch(&self, pat: &str) -> Result<Series, FrameError>

pub fn fullmatch_with_options( &self, pat: &str, case: bool, na: Option<bool>, ) -> Result<Series, FrameError>

pub fn match_regex(&self, pat: &str) -> Result<Series, FrameError>

pub fn match(&self, pat: &str) -> Result<Series, FrameError>

pub fn match_regex_with_options( &self, pat: &str, case: bool, na: Option<bool>, ) -> Result<Series, FrameError>

pub fn split_regex_get(&self, pat: &str, n: i64) -> Result<Series, FrameError>

pub fn zfill(&self, width: usize) -> Result<Series, FrameError>

pub fn center(&self, width: usize, fillchar: char) -> Result<Series, FrameError>

pub fn ljust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>

pub fn rjust(&self, width: usize, fillchar: char) -> Result<Series, FrameError>

pub fn isdigit(&self) -> Result<Series, FrameError>

pub fn isalpha(&self) -> Result<Series, FrameError>

pub fn isalnum(&self) -> Result<Series, FrameError>

pub fn isascii(&self) -> Result<Series, FrameError>

pub fn isspace(&self) -> Result<Series, FrameError>

pub fn islower(&self) -> Result<Series, FrameError>

pub fn isupper(&self) -> Result<Series, FrameError>

pub fn isnumeric(&self) -> Result<Series, FrameError>

pub fn get(&self, i: i64) -> Result<Series, FrameError>

pub fn wrap(&self, width: usize) -> Result<Series, FrameError>

pub fn wrap_with_drop_whitespace( &self, width: usize, drop_whitespace: bool, ) -> Result<Series, FrameError>

pub fn normalize(&self, form: &str) -> Result<Series, FrameError>

pub fn isdecimal(&self) -> Result<Series, FrameError>

pub fn istitle(&self) -> Result<Series, FrameError>

pub fn cat(&self, sep: &str) -> Result<String, FrameError>

pub fn cat_series( &self, others: &Series, sep: &str, na_rep: Option<&str>, ) -> Result<Series, FrameError>

pub fn cat_list( &self, others: &[&Series], sep: &str, na_rep: Option<&str>, ) -> Result<Series, FrameError>

pub fn find(&self, sub: &str) -> Result<Series, FrameError>

pub fn rfind(&self, sub: &str) -> Result<Series, FrameError>

Struct StringAccessor

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,