typst_library/foundations/
str.rs

1use std::borrow::{Borrow, Cow};
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref, Range};
5
6use comemo::Tracked;
7use ecow::EcoString;
8use serde::{Deserialize, Serialize};
9use typst_syntax::{Span, Spanned};
10use unicode_segmentation::UnicodeSegmentation;
11
12use crate::diag::{bail, At, SourceResult, StrResult};
13use crate::engine::Engine;
14use crate::foundations::{
15    cast, dict, func, repr, scope, ty, Array, Bytes, Context, Decimal, Dict, Func,
16    IntoValue, Label, Repr, Type, Value, Version,
17};
18use crate::layout::Alignment;
19
20/// Create a new [`Str`] from a format string.
21#[macro_export]
22#[doc(hidden)]
23macro_rules! __format_str {
24    ($($tts:tt)*) => {{
25        $crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
26    }};
27}
28
29#[doc(hidden)]
30pub use ecow::eco_format;
31
32#[doc(inline)]
33pub use crate::__format_str as format_str;
34
35/// A sequence of Unicode codepoints.
36///
37/// You can iterate over the grapheme clusters of the string using a [for
38/// loop]($scripting/#loops). Grapheme clusters are basically characters but
39/// keep together things that belong together, e.g. multiple codepoints that
40/// together form a flag emoji. Strings can be added with the `+` operator,
41/// [joined together]($scripting/#blocks) and multiplied with integers.
42///
43/// Typst provides utility methods for string manipulation. Many of these
44/// methods (e.g., `split`, `trim` and `replace`) operate on _patterns:_ A
45/// pattern can be either a string or a [regular expression]($regex). This makes
46/// the methods quite versatile.
47///
48/// All lengths and indices are expressed in terms of UTF-8 bytes. Indices are
49/// zero-based and negative indices wrap around to the end of the string.
50///
51/// You can convert a value to a string with this type's constructor.
52///
53/// # Example
54/// ```example
55/// #"hello world!" \
56/// #"\"hello\n  world\"!" \
57/// #"1 2 3".split() \
58/// #"1,2;3".split(regex("[,;]")) \
59/// #(regex("\d+") in "ten euros") \
60/// #(regex("\d+") in "10 euros")
61/// ```
62///
63/// # Escape sequences { #escapes }
64/// Just like in markup, you can escape a few symbols in strings:
65/// - `[\\]` for a backslash
66/// - `[\"]` for a quote
67/// - `[\n]` for a newline
68/// - `[\r]` for a carriage return
69/// - `[\t]` for a tab
70/// - `[\u{1f600}]` for a hexadecimal Unicode escape sequence
71#[ty(scope, cast, title = "String")]
72#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
73#[derive(Serialize, Deserialize)]
74#[serde(transparent)]
75pub struct Str(EcoString);
76
77impl Str {
78    /// Create a new, empty string.
79    pub fn new() -> Self {
80        Self(EcoString::new())
81    }
82
83    /// Return `true` if the length is 0.
84    pub fn is_empty(&self) -> bool {
85        self.0.is_empty()
86    }
87
88    /// Repeat the string a number of times.
89    pub fn repeat(&self, n: usize) -> StrResult<Self> {
90        if self.0.len().checked_mul(n).is_none() {
91            return Err(eco_format!("cannot repeat this string {n} times"));
92        }
93        Ok(Self(self.0.repeat(n)))
94    }
95
96    /// A string slice containing the entire string.
97    pub fn as_str(&self) -> &str {
98        self
99    }
100
101    /// Resolve an index or throw an out of bounds error.
102    fn locate(&self, index: i64) -> StrResult<usize> {
103        self.locate_opt(index)?
104            .ok_or_else(|| out_of_bounds(index, self.len()))
105    }
106
107    /// Resolve an index, if it is within bounds and on a valid char boundary.
108    ///
109    /// `index == len` is considered in bounds.
110    fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
111        let wrapped =
112            if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
113
114        let resolved = wrapped
115            .and_then(|v| usize::try_from(v).ok())
116            .filter(|&v| v <= self.0.len());
117
118        if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
119            return Err(not_a_char_boundary(index));
120        }
121
122        Ok(resolved)
123    }
124}
125
126#[scope]
127impl Str {
128    /// Converts a value to a string.
129    ///
130    /// - Integers are formatted in base 10. This can be overridden with the
131    ///   optional `base` parameter.
132    /// - Floats are formatted in base 10 and never in exponential notation.
133    /// - Negative integers and floats are formatted with the Unicode minus sign
134    ///   ("−" U+2212) instead of the ASCII minus sign ("-" U+002D).
135    /// - From labels the name is extracted.
136    /// - Bytes are decoded as UTF-8.
137    ///
138    /// If you wish to convert from and to Unicode code points, see the
139    /// [`to-unicode`]($str.to-unicode) and [`from-unicode`]($str.from-unicode)
140    /// functions.
141    ///
142    /// ```example
143    /// #str(10) \
144    /// #str(4000, base: 16) \
145    /// #str(2.7) \
146    /// #str(1e8) \
147    /// #str(<intro>)
148    /// ```
149    #[func(constructor)]
150    pub fn construct(
151        /// The value that should be converted to a string.
152        value: ToStr,
153        /// The base (radix) to display integers in, between 2 and 36.
154        #[named]
155        #[default(Spanned::new(10, Span::detached()))]
156        base: Spanned<i64>,
157    ) -> SourceResult<Str> {
158        Ok(match value {
159            ToStr::Str(s) => {
160                if base.v != 10 {
161                    bail!(base.span, "base is only supported for integers");
162                }
163                s
164            }
165            ToStr::Int(n) => {
166                if base.v < 2 || base.v > 36 {
167                    bail!(base.span, "base must be between 2 and 36");
168                }
169                repr::format_int_with_base(n, base.v).into()
170            }
171        })
172    }
173
174    /// The length of the string in UTF-8 encoded bytes.
175    #[func(title = "Length")]
176    pub fn len(&self) -> usize {
177        self.0.len()
178    }
179
180    /// Extracts the first grapheme cluster of the string.
181    /// Fails with an error if the string is empty.
182    #[func]
183    pub fn first(&self) -> StrResult<Str> {
184        self.0
185            .graphemes(true)
186            .next()
187            .map(Into::into)
188            .ok_or_else(string_is_empty)
189    }
190
191    /// Extracts the last grapheme cluster of the string.
192    /// Fails with an error if the string is empty.
193    #[func]
194    pub fn last(&self) -> StrResult<Str> {
195        self.0
196            .graphemes(true)
197            .next_back()
198            .map(Into::into)
199            .ok_or_else(string_is_empty)
200    }
201
202    /// Extracts the first grapheme cluster after the specified index. Returns
203    /// the default value if the index is out of bounds or fails with an error
204    /// if no default value was specified.
205    #[func]
206    pub fn at(
207        &self,
208        /// The byte index. If negative, indexes from the back.
209        index: i64,
210        /// A default value to return if the index is out of bounds.
211        #[named]
212        default: Option<Value>,
213    ) -> StrResult<Value> {
214        let len = self.len();
215        self.locate_opt(index)?
216            .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
217            .or(default)
218            .ok_or_else(|| no_default_and_out_of_bounds(index, len))
219    }
220
221    /// Extracts a substring of the string.
222    /// Fails with an error if the start or end index is out of bounds.
223    #[func]
224    pub fn slice(
225        &self,
226        /// The start byte index (inclusive). If negative, indexes from the
227        /// back.
228        start: i64,
229        /// The end byte index (exclusive). If omitted, the whole slice until
230        /// the end of the string is extracted. If negative, indexes from the
231        /// back.
232        #[default]
233        end: Option<i64>,
234        /// The number of bytes to extract. This is equivalent to passing
235        /// `start + count` as the `end` position. Mutually exclusive with `end`.
236        #[named]
237        count: Option<i64>,
238    ) -> StrResult<Str> {
239        let end = end.or(count.map(|c| start + c)).unwrap_or(self.len() as i64);
240        let start = self.locate(start)?;
241        let end = self.locate(end)?.max(start);
242        Ok(self.0[start..end].into())
243    }
244
245    /// Returns the grapheme clusters of the string as an array of substrings.
246    #[func]
247    pub fn clusters(&self) -> Array {
248        self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
249    }
250
251    /// Returns the Unicode codepoints of the string as an array of substrings.
252    #[func]
253    pub fn codepoints(&self) -> Array {
254        self.chars().map(|c| Value::Str(c.into())).collect()
255    }
256
257    /// Converts a character into its corresponding code point.
258    ///
259    /// ```example
260    /// #"a".to-unicode() \
261    /// #("a\u{0300}"
262    ///    .codepoints()
263    ///    .map(str.to-unicode))
264    /// ```
265    #[func]
266    pub fn to_unicode(
267        /// The character that should be converted.
268        character: char,
269    ) -> u32 {
270        character as u32
271    }
272
273    /// Converts a unicode code point into its corresponding string.
274    ///
275    /// ```example
276    /// #str.from-unicode(97)
277    /// ```
278    #[func]
279    pub fn from_unicode(
280        /// The code point that should be converted.
281        value: u32,
282    ) -> StrResult<Str> {
283        let c: char = value
284            .try_into()
285            .map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
286        Ok(c.into())
287    }
288
289    /// Whether the string contains the specified pattern.
290    ///
291    /// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
292    /// instead of `{"abcd".contains("bc")}`.
293    #[func]
294    pub fn contains(
295        &self,
296        /// The pattern to search for.
297        pattern: StrPattern,
298    ) -> bool {
299        match pattern {
300            StrPattern::Str(pat) => self.0.contains(pat.as_str()),
301            StrPattern::Regex(re) => re.is_match(self),
302        }
303    }
304
305    /// Whether the string starts with the specified pattern.
306    #[func]
307    pub fn starts_with(
308        &self,
309        /// The pattern the string might start with.
310        pattern: StrPattern,
311    ) -> bool {
312        match pattern {
313            StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
314            StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
315        }
316    }
317
318    /// Whether the string ends with the specified pattern.
319    #[func]
320    pub fn ends_with(
321        &self,
322        /// The pattern the string might end with.
323        pattern: StrPattern,
324    ) -> bool {
325        match pattern {
326            StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
327            StrPattern::Regex(re) => {
328                let mut start_byte = 0;
329                while let Some(mat) = re.find_at(self, start_byte) {
330                    if mat.end() == self.0.len() {
331                        return true;
332                    }
333
334                    // There might still be a match overlapping this one, so
335                    // restart at the next code point.
336                    let Some(c) = self[mat.start()..].chars().next() else { break };
337                    start_byte = mat.start() + c.len_utf8();
338                }
339                false
340            }
341        }
342    }
343
344    /// Searches for the specified pattern in the string and returns the first
345    /// match as a string or `{none}` if there is no match.
346    #[func]
347    pub fn find(
348        &self,
349        /// The pattern to search for.
350        pattern: StrPattern,
351    ) -> Option<Str> {
352        match pattern {
353            StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
354            StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
355        }
356    }
357
358    /// Searches for the specified pattern in the string and returns the index
359    /// of the first match as an integer or `{none}` if there is no match.
360    #[func]
361    pub fn position(
362        &self,
363        /// The pattern to search for.
364        pattern: StrPattern,
365    ) -> Option<usize> {
366        match pattern {
367            StrPattern::Str(pat) => self.0.find(pat.as_str()),
368            StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
369        }
370    }
371
372    /// Searches for the specified pattern in the string and returns a
373    /// dictionary with details about the first match or `{none}` if there is no
374    /// match.
375    ///
376    /// The returned dictionary has the following keys:
377    /// - `start`: The start offset of the match
378    /// - `end`: The end offset of the match
379    /// - `text`: The text that matched.
380    /// - `captures`: An array containing a string for each matched capturing
381    ///   group. The first item of the array contains the first matched
382    ///   capturing, not the whole match! This is empty unless the `pattern` was
383    ///   a regex with capturing groups.
384    #[func]
385    pub fn match_(
386        &self,
387        /// The pattern to search for.
388        pattern: StrPattern,
389    ) -> Option<Dict> {
390        match pattern {
391            StrPattern::Str(pat) => {
392                self.0.match_indices(pat.as_str()).next().map(match_to_dict)
393            }
394            StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
395        }
396    }
397
398    /// Searches for the specified pattern in the string and returns an array of
399    /// dictionaries with details about all matches. For details about the
400    /// returned dictionaries, see above.
401    #[func]
402    pub fn matches(
403        &self,
404        /// The pattern to search for.
405        pattern: StrPattern,
406    ) -> Array {
407        match pattern {
408            StrPattern::Str(pat) => self
409                .0
410                .match_indices(pat.as_str())
411                .map(match_to_dict)
412                .map(Value::Dict)
413                .collect(),
414            StrPattern::Regex(re) => re
415                .captures_iter(self)
416                .map(captures_to_dict)
417                .map(Value::Dict)
418                .collect(),
419        }
420    }
421
422    /// Replace at most `count` occurrences of the given pattern with a
423    /// replacement string or function (beginning from the start). If no count
424    /// is given, all occurrences are replaced.
425    #[func]
426    pub fn replace(
427        &self,
428        engine: &mut Engine,
429        context: Tracked<Context>,
430        /// The pattern to search for.
431        pattern: StrPattern,
432        /// The string to replace the matches with or a function that gets a
433        /// dictionary for each match and can return individual replacement
434        /// strings.
435        replacement: Replacement,
436        ///  If given, only the first `count` matches of the pattern are placed.
437        #[named]
438        count: Option<usize>,
439    ) -> SourceResult<Str> {
440        // Heuristic: Assume the new string is about the same length as
441        // the current string.
442        let mut output = EcoString::with_capacity(self.as_str().len());
443
444        // Replace one match of a pattern with the replacement.
445        let mut last_match = 0;
446        let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
447            // Push everything until the match.
448            output.push_str(&self[last_match..range.start]);
449            last_match = range.end;
450
451            // Determine and push the replacement.
452            match &replacement {
453                Replacement::Str(s) => output.push_str(s),
454                Replacement::Func(func) => {
455                    let piece = func
456                        .call(engine, context, [dict])?
457                        .cast::<Str>()
458                        .at(func.span())?;
459                    output.push_str(&piece);
460                }
461            }
462
463            Ok(())
464        };
465
466        // Iterate over the matches of the `pattern`.
467        let count = count.unwrap_or(usize::MAX);
468        match &pattern {
469            StrPattern::Str(pat) => {
470                for m in self.match_indices(pat.as_str()).take(count) {
471                    let (start, text) = m;
472                    handle_match(start..start + text.len(), match_to_dict(m))?;
473                }
474            }
475            StrPattern::Regex(re) => {
476                for caps in re.captures_iter(self).take(count) {
477                    // Extract the entire match over all capture groups.
478                    let m = caps.get(0).unwrap();
479                    handle_match(m.start()..m.end(), captures_to_dict(caps))?;
480                }
481            }
482        }
483
484        // Push the remainder.
485        output.push_str(&self[last_match..]);
486        Ok(output.into())
487    }
488
489    /// Removes matches of a pattern from one or both sides of the string, once or
490    /// repeatedly and returns the resulting string.
491    #[func]
492    pub fn trim(
493        &self,
494        /// The pattern to search for. If `{none}`, trims white spaces.
495        #[default]
496        pattern: Option<StrPattern>,
497        /// Can be `{start}` or `{end}` to only trim the start or end of the
498        /// string. If omitted, both sides are trimmed.
499        #[named]
500        at: Option<StrSide>,
501        /// Whether to repeatedly removes matches of the pattern or just once.
502        /// Defaults to `{true}`.
503        #[named]
504        #[default(true)]
505        repeat: bool,
506    ) -> Str {
507        let mut start = matches!(at, Some(StrSide::Start) | None);
508        let end = matches!(at, Some(StrSide::End) | None);
509
510        let trimmed = match pattern {
511            None => match at {
512                None => self.0.trim(),
513                Some(StrSide::Start) => self.0.trim_start(),
514                Some(StrSide::End) => self.0.trim_end(),
515            },
516            Some(StrPattern::Str(pat)) => {
517                let pat = pat.as_str();
518                let mut s = self.as_str();
519                if repeat {
520                    if start {
521                        s = s.trim_start_matches(pat);
522                    }
523                    if end {
524                        s = s.trim_end_matches(pat);
525                    }
526                } else {
527                    if start {
528                        s = s.strip_prefix(pat).unwrap_or(s);
529                    }
530                    if end {
531                        s = s.strip_suffix(pat).unwrap_or(s);
532                    }
533                }
534                s
535            }
536            Some(StrPattern::Regex(re)) => {
537                let s = self.as_str();
538                let mut last = None;
539                let mut range = 0..s.len();
540
541                for m in re.find_iter(s) {
542                    // Does this match follow directly after the last one?
543                    let consecutive = last == Some(m.start());
544
545                    // As long as we're at the beginning or in a consecutive run
546                    // of matches, and we're still trimming at the start, trim.
547                    start &= m.start() == 0 || consecutive;
548                    if start {
549                        range.start = m.end();
550                        start &= repeat;
551                    }
552
553                    // Reset end trim if we aren't consecutive anymore or aren't
554                    // repeating.
555                    if end && (!consecutive || !repeat) {
556                        range.end = m.start();
557                    }
558
559                    last = Some(m.end());
560                }
561
562                // Is the last match directly at the end?
563                if last.is_some_and(|last| last < s.len()) {
564                    range.end = s.len();
565                }
566
567                &s[range.start..range.start.max(range.end)]
568            }
569        };
570
571        trimmed.into()
572    }
573
574    /// Splits a string at matches of a specified pattern and returns an array
575    /// of the resulting parts.
576    ///
577    /// When the empty string is used as a separator, it separates every
578    /// character (i.e., Unicode code point) in the string, along with the
579    /// beginning and end of the string. In practice, this means that the
580    /// resulting list of parts will contain the empty string at the start
581    /// and end of the list.
582    #[func]
583    pub fn split(
584        &self,
585        /// The pattern to split at. Defaults to whitespace.
586        #[default]
587        pattern: Option<StrPattern>,
588    ) -> Array {
589        let s = self.as_str();
590        match pattern {
591            None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
592            Some(StrPattern::Str(pat)) => {
593                s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
594            }
595            Some(StrPattern::Regex(re)) => {
596                re.split(s).map(|v| Value::Str(v.into())).collect()
597            }
598        }
599    }
600
601    /// Reverse the string.
602    #[func(title = "Reverse")]
603    pub fn rev(&self) -> Str {
604        let mut s = EcoString::with_capacity(self.0.len());
605        for grapheme in self.as_str().graphemes(true).rev() {
606            s.push_str(grapheme);
607        }
608        s.into()
609    }
610}
611
612impl Deref for Str {
613    type Target = str;
614
615    fn deref(&self) -> &str {
616        &self.0
617    }
618}
619
620impl Debug for Str {
621    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
622        Debug::fmt(self.as_str(), f)
623    }
624}
625
626impl Display for Str {
627    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
628        Display::fmt(self.as_str(), f)
629    }
630}
631
632impl Repr for Str {
633    fn repr(&self) -> EcoString {
634        self.as_ref().repr()
635    }
636}
637
638impl Repr for EcoString {
639    fn repr(&self) -> EcoString {
640        self.as_ref().repr()
641    }
642}
643
644impl Repr for str {
645    fn repr(&self) -> EcoString {
646        let mut r = EcoString::with_capacity(self.len() + 2);
647        r.push('"');
648        for c in self.chars() {
649            match c {
650                '\0' => r.push_str(r"\u{0}"),
651                '\'' => r.push('\''),
652                '"' => r.push_str(r#"\""#),
653                _ => r.extend(c.escape_debug()),
654            }
655        }
656        r.push('"');
657        r
658    }
659}
660
661impl Repr for char {
662    fn repr(&self) -> EcoString {
663        EcoString::from(*self).repr()
664    }
665}
666
667impl Add for Str {
668    type Output = Self;
669
670    fn add(mut self, rhs: Self) -> Self::Output {
671        self += rhs;
672        self
673    }
674}
675
676impl AddAssign for Str {
677    fn add_assign(&mut self, rhs: Self) {
678        self.0.push_str(rhs.as_str());
679    }
680}
681
682impl AsRef<str> for Str {
683    fn as_ref(&self) -> &str {
684        self
685    }
686}
687
688impl Borrow<str> for Str {
689    fn borrow(&self) -> &str {
690        self
691    }
692}
693
694impl From<char> for Str {
695    fn from(c: char) -> Self {
696        Self(c.into())
697    }
698}
699
700impl From<&str> for Str {
701    fn from(s: &str) -> Self {
702        Self(s.into())
703    }
704}
705
706impl From<EcoString> for Str {
707    fn from(s: EcoString) -> Self {
708        Self(s)
709    }
710}
711
712impl From<String> for Str {
713    fn from(s: String) -> Self {
714        Self(s.into())
715    }
716}
717
718impl From<Cow<'_, str>> for Str {
719    fn from(s: Cow<str>) -> Self {
720        Self(s.into())
721    }
722}
723
724impl FromIterator<char> for Str {
725    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
726        Self(iter.into_iter().collect())
727    }
728}
729
730impl From<Str> for EcoString {
731    fn from(str: Str) -> Self {
732        str.0
733    }
734}
735
736impl From<Str> for String {
737    fn from(s: Str) -> Self {
738        s.0.into()
739    }
740}
741
742cast! {
743    char,
744    self => Value::Str(self.into()),
745    string: Str => {
746        let mut chars = string.chars();
747        match (chars.next(), chars.next()) {
748            (Some(c), None) => c,
749            _ => bail!("expected exactly one character"),
750        }
751    },
752}
753
754cast! {
755    &str,
756    self => Value::Str(self.into()),
757}
758
759cast! {
760    EcoString,
761    self => Value::Str(self.into()),
762    v: Str => v.into(),
763}
764
765cast! {
766    String,
767    self => Value::Str(self.into()),
768    v: Str => v.into(),
769}
770
771/// A value that can be cast to a string.
772pub enum ToStr {
773    /// A string value ready to be used as-is.
774    Str(Str),
775    /// An integer about to be formatted in a given base.
776    Int(i64),
777}
778
779cast! {
780    ToStr,
781    v: i64 => Self::Int(v),
782    v: f64 => Self::Str(repr::display_float(v).into()),
783    v: Decimal => Self::Str(format_str!("{}", v)),
784    v: Version => Self::Str(format_str!("{}", v)),
785    v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid utf-8")?),
786    v: Label => Self::Str(v.resolve().as_str().into()),
787    v: Type => Self::Str(v.long_name().into()),
788    v: Str => Self::Str(v),
789}
790
791/// Convert an item of std's `match_indices` to a dictionary.
792fn match_to_dict((start, text): (usize, &str)) -> Dict {
793    dict! {
794        "start" => start,
795        "end" => start + text.len(),
796        "text" => text,
797        "captures" => Array::new(),
798    }
799}
800
801/// Convert regex captures to a dictionary.
802fn captures_to_dict(cap: regex::Captures) -> Dict {
803    let m = cap.get(0).expect("missing first match");
804    dict! {
805        "start" => m.start(),
806        "end" => m.end(),
807        "text" => m.as_str(),
808        "captures" =>  cap.iter()
809            .skip(1)
810            .map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
811            .collect::<Array>(),
812    }
813}
814
815/// The out of bounds access error message.
816#[cold]
817fn out_of_bounds(index: i64, len: usize) -> EcoString {
818    eco_format!("string index out of bounds (index: {}, len: {})", index, len)
819}
820
821/// The out of bounds access error message when no default value was given.
822#[cold]
823fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
824    eco_format!("no default value was specified and string index out of bounds (index: {}, len: {})", index, len)
825}
826
827/// The char boundary access error message.
828#[cold]
829fn not_a_char_boundary(index: i64) -> EcoString {
830    eco_format!("string index {} is not a character boundary", index)
831}
832
833/// The error message when the string is empty.
834#[cold]
835fn string_is_empty() -> EcoString {
836    "string is empty".into()
837}
838
839/// A regular expression.
840///
841/// Can be used as a [show rule selector]($styling/#show-rules) and with
842/// [string methods]($str) like `find`, `split`, and `replace`.
843///
844/// [See here](https://docs.rs/regex/latest/regex/#syntax) for a specification
845/// of the supported syntax.
846///
847/// # Example
848/// ```example
849/// // Works with string methods.
850/// #"a,b;c".split(regex("[,;]"))
851///
852/// // Works with show rules.
853/// #show regex("\d+"): set text(red)
854///
855/// The numbers 1 to 10.
856/// ```
857#[ty(scope)]
858#[derive(Debug, Clone)]
859pub struct Regex(regex::Regex);
860
861impl Regex {
862    /// Create a new regular expression.
863    pub fn new(re: &str) -> StrResult<Self> {
864        regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
865    }
866}
867
868#[scope]
869impl Regex {
870    /// Create a regular expression from a string.
871    #[func(constructor)]
872    pub fn construct(
873        /// The regular expression as a string.
874        ///
875        /// Most regex escape sequences just work because they are not valid Typst
876        /// escape sequences. To produce regex escape sequences that are also valid in
877        /// Typst (e.g. `[\\]`), you need to escape twice. Thus, to match a verbatim
878        /// backslash, you would need to write `{regex("\\\\")}`.
879        ///
880        /// If you need many escape sequences, you can also create a raw element
881        /// and extract its text to use it for your regular expressions:
882        /// ```{regex(`\d+\.\d+\.\d+`.text)}```.
883        regex: Spanned<Str>,
884    ) -> SourceResult<Regex> {
885        Self::new(&regex.v).at(regex.span)
886    }
887}
888
889impl Deref for Regex {
890    type Target = regex::Regex;
891
892    fn deref(&self) -> &Self::Target {
893        &self.0
894    }
895}
896
897impl Repr for Regex {
898    fn repr(&self) -> EcoString {
899        eco_format!("regex({})", self.0.as_str().repr())
900    }
901}
902
903impl PartialEq for Regex {
904    fn eq(&self, other: &Self) -> bool {
905        self.0.as_str() == other.0.as_str()
906    }
907}
908
909impl Hash for Regex {
910    fn hash<H: Hasher>(&self, state: &mut H) {
911        self.0.as_str().hash(state);
912    }
913}
914
915/// A pattern which can be searched for in a string.
916#[derive(Debug, Clone)]
917pub enum StrPattern {
918    /// Just a string.
919    Str(Str),
920    /// A regular expression.
921    Regex(Regex),
922}
923
924cast! {
925    StrPattern,
926    self => match self {
927        Self::Str(v) => v.into_value(),
928        Self::Regex(v) => v.into_value(),
929    },
930    v: Str => Self::Str(v),
931    v: Regex => Self::Regex(v),
932}
933
934/// A side of a string.
935#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
936pub enum StrSide {
937    /// The logical start of the string, may be left or right depending on the
938    /// language.
939    Start,
940    /// The logical end of the string.
941    End,
942}
943
944cast! {
945    StrSide,
946    v: Alignment => match v {
947        Alignment::START => Self::Start,
948        Alignment::END => Self::End,
949        _ => bail!("expected either `start` or `end`"),
950    },
951}
952
953/// A replacement for a matched [`Str`]
954pub enum Replacement {
955    /// A string a match is replaced with.
956    Str(Str),
957    /// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`)
958    /// whose output is inserted for the match.
959    Func(Func),
960}
961
962cast! {
963    Replacement,
964    self => match self {
965        Self::Str(v) => v.into_value(),
966        Self::Func(v) => v.into_value(),
967    },
968    v: Str => Self::Str(v),
969    v: Func => Self::Func(v)
970}