Skip to main content

typst_library/foundations/
str.rs

1use std::borrow::{Borrow, Cow};
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref, Range};
5
6use comemo::Tracked;
7use ecow::EcoString;
8use serde::{Deserialize, Serialize};
9use typst_syntax::Spanned;
10use unicode_normalization::UnicodeNormalization;
11use unicode_segmentation::UnicodeSegmentation;
12
13use crate::diag::{At, SourceResult, StrResult, bail};
14use crate::engine::Engine;
15use crate::foundations::{
16    Array, Bytes, Cast, Context, Decimal, Dict, Func, IntoValue, Label, Repr, Type,
17    Value, Version, cast, dict, func, repr, scope, ty,
18};
19use crate::layout::Alignment;
20
21/// Create a new [`Str`] from a format string.
22#[macro_export]
23#[doc(hidden)]
24macro_rules! __format_str {
25    ($($tts:tt)*) => {{
26        $crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
27    }};
28}
29
30#[doc(hidden)]
31pub use ecow::eco_format;
32
33#[doc(inline)]
34pub use crate::__format_str as format_str;
35
36/// A sequence of Unicode codepoints.
37///
38/// You can iterate over the grapheme clusters of the string using a
39/// @reference:scripting:loops[for loop]. Grapheme clusters are basically
40/// characters but keep together things that belong together, e.g. multiple
41/// codepoints that together form a flag emoji. Strings can be added with the
42/// `+` operator, @reference:scripting:blocks[joined together] and multiplied
43/// with integers.
44///
45/// Typst provides utility methods for string manipulation. Many of these
46/// methods (e.g., @str.split[`split`], @str.trim[`trim`] and
47/// @str.replace[`replace`]) operate on _patterns:_ A pattern can be either a
48/// string or a @regex[regular expression]. This makes the methods quite
49/// versatile.
50///
51/// All lengths and indices are expressed in terms of UTF-8 bytes. Indices are
52/// zero-based and negative indices wrap around to the end of the string.
53///
54/// You can convert a value to a string with the `str` constructor.
55///
56/// = Example <example>
57/// ```example
58/// #"hello world!" \
59/// #"\"hello\n  world\"!" \
60/// #"1 2 3".split() \
61/// #"1,2;3".split(regex("[,;]")) \
62/// #(regex("\\d+") in "ten euros") \
63/// #(regex("\\d+") in "10 euros")
64/// ```
65///
66/// = #short-or-long[Escapes][Escape sequences] <escapes>
67/// Just like in markup, you can escape a few symbols in strings:
68/// - `[\\]` for a backslash
69/// - `[\"]` for a quote
70/// - `[\n]` for a newline
71/// - `[\r]` for a carriage return
72/// - `[\t]` for a tab
73/// - `[\u{1f600}]` for a hexadecimal Unicode escape sequence
74#[ty(scope, cast, title = "String")]
75#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
76#[derive(Serialize, Deserialize)]
77#[serde(transparent)]
78pub struct Str(EcoString);
79
80impl Str {
81    /// Create a new, empty string.
82    pub fn new() -> Self {
83        Self(EcoString::new())
84    }
85
86    /// Return `true` if the length is 0.
87    pub fn is_empty(&self) -> bool {
88        self.0.is_empty()
89    }
90
91    /// Repeat the string a number of times.
92    pub fn repeat(&self, n: usize) -> StrResult<Self> {
93        if self.0.len().checked_mul(n).is_none() {
94            return Err(eco_format!("cannot repeat this string {n} times"));
95        }
96        Ok(Self(self.0.repeat(n)))
97    }
98
99    /// A string slice containing the entire string.
100    pub fn as_str(&self) -> &str {
101        self
102    }
103
104    /// Resolve an index or throw an out of bounds error.
105    fn locate(&self, index: i64) -> StrResult<usize> {
106        self.locate_opt(index)?
107            .ok_or_else(|| out_of_bounds(index, self.len()))
108    }
109
110    /// Resolve an index, if it is within bounds and on a valid char boundary.
111    ///
112    /// `index == len` is considered in bounds.
113    fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
114        let wrapped =
115            if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
116
117        let resolved = wrapped
118            .and_then(|v| usize::try_from(v).ok())
119            .filter(|&v| v <= self.0.len());
120
121        if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
122            return Err(not_a_char_boundary(index));
123        }
124
125        Ok(resolved)
126    }
127}
128
129#[scope]
130impl Str {
131    /// Converts a value to a string.
132    ///
133    /// - Integers are formatted in base 10. This can be overridden with the
134    ///   optional `base` parameter.
135    /// - Floats are formatted in base 10 and never in exponential notation.
136    /// - Negative integers and floats are formatted with the Unicode minus sign
137    ///   ("−" U+2212) instead of the ASCII minus sign ("-" U+002D).
138    /// - From labels the name is extracted.
139    /// - Bytes are decoded as UTF-8.
140    ///
141    /// If you wish to convert from and to Unicode code points, see the
142    /// @str.to-unicode[`to-unicode`] and @str.from-unicode[`from-unicode`]
143    /// functions.
144    ///
145    /// ```example
146    /// #str(10) \
147    /// #str(4000, base: 16) \
148    /// #str(2.7) \
149    /// #str(1e8) \
150    /// #str(<intro>)
151    /// ```
152    #[func(constructor)]
153    pub fn construct(
154        /// The value that should be converted to a string.
155        value: ToStr,
156        /// The base (radix) to display integers in, between 2 and 36.
157        #[named]
158        #[default(Spanned::detached(Base::Default))]
159        base: Spanned<Base>,
160    ) -> SourceResult<Str> {
161        Ok(match value {
162            ToStr::Str(s) => {
163                if matches!(base.v, Base::User(_)) {
164                    bail!(base.span, "base is only supported for integers");
165                }
166                s
167            }
168            ToStr::Int(n) => {
169                let b = base.v.value();
170                if b == 1 && n > 0 {
171                    bail!(
172                        base.span, "base must be between 2 and 36";
173                        hint: "generate a unary representation with `\"1\" * {n}`";
174                    );
175                }
176                if b < 2 || b > 36 {
177                    bail!(base.span, "base must be between 2 and 36");
178                }
179                repr::format_int_with_base(n, b).into()
180            }
181        })
182    }
183
184    /// The length of the string in UTF-8 encoded bytes.
185    #[func(title = "Length")]
186    pub fn len(&self) -> usize {
187        self.0.len()
188    }
189
190    /// Extracts the first grapheme cluster of the string.
191    ///
192    /// Returns the provided default value if the string is empty or fails with
193    /// an error if no default value was specified.
194    #[func]
195    pub fn first(
196        &self,
197        /// A default value to return if the string is empty.
198        #[named]
199        default: Option<Str>,
200    ) -> StrResult<Str> {
201        self.0
202            .graphemes(true)
203            .next()
204            .map(Into::into)
205            .or(default)
206            .ok_or_else(string_is_empty)
207    }
208
209    /// Extracts the last grapheme cluster of the string.
210    ///
211    /// Returns the provided default value if the string is empty or fails with
212    /// an error if no default value was specified.
213    #[func]
214    pub fn last(
215        &self,
216        /// A default value to return if the string is empty.
217        #[named]
218        default: Option<Str>,
219    ) -> StrResult<Str> {
220        self.0
221            .graphemes(true)
222            .next_back()
223            .map(Into::into)
224            .or(default)
225            .ok_or_else(string_is_empty)
226    }
227
228    /// Extracts the first grapheme cluster after the specified index. Returns
229    /// the default value if the index is out of bounds or fails with an error
230    /// if no default value was specified.
231    #[func]
232    pub fn at(
233        &self,
234        /// The byte index. If negative, indexes from the back.
235        index: i64,
236        /// A default value to return if the index is out of bounds.
237        #[named]
238        default: Option<Value>,
239    ) -> StrResult<Value> {
240        let len = self.len();
241        self.locate_opt(index)?
242            .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
243            .or(default)
244            .ok_or_else(|| no_default_and_out_of_bounds(index, len))
245    }
246
247    /// Extracts a substring of the string. Fails with an error if the start or
248    /// end index is out of bounds.
249    #[func]
250    pub fn slice(
251        &self,
252        /// The start byte index (inclusive). If negative, indexes from the
253        /// back.
254        start: i64,
255        /// The end byte index (exclusive). If omitted, the whole slice until
256        /// the end of the string is extracted. If negative, indexes from the
257        /// back.
258        #[default]
259        end: Option<i64>,
260        /// The number of bytes to extract. This is equivalent to passing
261        /// `start + count` as the `end` position. Mutually exclusive with
262        /// `end`.
263        #[named]
264        count: Option<i64>,
265    ) -> StrResult<Str> {
266        if end.is_some() && count.is_some() {
267            bail!("`end` and `count` are mutually exclusive");
268        }
269        let start = self.locate(start)?;
270        let end = end.or(count.map(|c| start as i64 + c));
271        let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
272        Ok(self.0[start..end].into())
273    }
274
275    /// Returns the grapheme clusters of the string as an array of substrings.
276    #[func]
277    pub fn clusters(&self) -> Array {
278        self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
279    }
280
281    /// Returns the Unicode codepoints of the string as an array of substrings.
282    #[func]
283    pub fn codepoints(&self) -> Array {
284        self.chars().map(|c| Value::Str(c.into())).collect()
285    }
286
287    /// Converts a character into its corresponding code point.
288    ///
289    /// ```example
290    /// #"a".to-unicode() \
291    /// #("a\u{0300}"
292    ///    .codepoints()
293    ///    .map(str.to-unicode))
294    /// ```
295    #[func]
296    pub fn to_unicode(
297        /// The character that should be converted.
298        character: char,
299    ) -> u32 {
300        character as u32
301    }
302
303    /// Converts a unicode code point into its corresponding string.
304    ///
305    /// ```example
306    /// #str.from-unicode(97)
307    /// ```
308    #[func]
309    pub fn from_unicode(
310        /// The code point that should be converted.
311        value: u32,
312    ) -> StrResult<Str> {
313        let c: char = value
314            .try_into()
315            .map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
316        Ok(c.into())
317    }
318
319    /// Normalizes the string to the given Unicode normal form.
320    ///
321    /// This is useful when manipulating strings containing Unicode combining
322    /// characters.
323    ///
324    /// ```typ
325    /// #assert.eq("é".normalize(form: "nfd"), "e\u{0301}")
326    /// #assert.eq("ſ́".normalize(form: "nfkc"), "ś")
327    /// ```
328    #[func]
329    pub fn normalize(
330        &self,
331        #[named]
332        #[default(UnicodeNormalForm::Nfc)]
333        form: UnicodeNormalForm,
334    ) -> Str {
335        match form {
336            UnicodeNormalForm::Nfc => self.nfc().collect(),
337            UnicodeNormalForm::Nfd => self.nfd().collect(),
338            UnicodeNormalForm::Nfkc => self.nfkc().collect(),
339            UnicodeNormalForm::Nfkd => self.nfkd().collect(),
340        }
341    }
342
343    /// Whether the string contains the specified pattern.
344    ///
345    /// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
346    /// instead of `{"abcd".contains("bc")}`.
347    #[func]
348    pub fn contains(
349        &self,
350        /// The pattern to search for.
351        pattern: StrPattern,
352    ) -> bool {
353        match pattern {
354            StrPattern::Str(pat) => self.0.contains(pat.as_str()),
355            StrPattern::Regex(re) => re.is_match(self),
356        }
357    }
358
359    /// Whether the string starts with the specified pattern.
360    #[func]
361    pub fn starts_with(
362        &self,
363        /// The pattern the string might start with.
364        pattern: StrPattern,
365    ) -> bool {
366        match pattern {
367            StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
368            StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
369        }
370    }
371
372    /// Whether the string ends with the specified pattern.
373    #[func]
374    pub fn ends_with(
375        &self,
376        /// The pattern the string might end with.
377        pattern: StrPattern,
378    ) -> bool {
379        match pattern {
380            StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
381            StrPattern::Regex(re) => {
382                let mut start_byte = 0;
383                while let Some(mat) = re.find_at(self, start_byte) {
384                    if mat.end() == self.0.len() {
385                        return true;
386                    }
387
388                    // There might still be a match overlapping this one, so
389                    // restart at the next code point.
390                    let Some(c) = self[mat.start()..].chars().next() else { break };
391                    start_byte = mat.start() + c.len_utf8();
392                }
393                false
394            }
395        }
396    }
397
398    /// Searches for the specified pattern in the string and returns the first
399    /// match as a string or `{none}` if there is no match.
400    #[func]
401    pub fn find(
402        &self,
403        /// The pattern to search for.
404        pattern: StrPattern,
405    ) -> Option<Str> {
406        match pattern {
407            StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
408            StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
409        }
410    }
411
412    /// Searches for the specified pattern in the string and returns the index
413    /// of the first match as an integer or `{none}` if there is no match.
414    #[func]
415    pub fn position(
416        &self,
417        /// The pattern to search for.
418        pattern: StrPattern,
419    ) -> Option<usize> {
420        match pattern {
421            StrPattern::Str(pat) => self.0.find(pat.as_str()),
422            StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
423        }
424    }
425
426    /// Searches for the specified pattern in the string and returns a
427    /// dictionary with details about the first match or `{none}` if there is no
428    /// match.
429    ///
430    /// The returned dictionary has the following keys:
431    /// - `start`: The start offset of the match
432    /// - `end`: The end offset of the match
433    /// - `text`: The text that matched.
434    /// - `captures`: An array containing a string for each matched capturing
435    ///   group. The first item of the array contains the first matched
436    ///   capturing, not the whole match! This is empty unless the `pattern` was
437    ///   a regex with capturing groups.
438    ///
439    /// #example(
440    ///   title: "Shape of the returned dictionary",
441    ///   ```
442    ///   #let pat = regex("not (a|an) (apple|cat)")
443    ///   #"I'm a doctor, not an apple.".match(pat) \
444    ///   #"I am not a cat!".match(pat)
445    ///   ```
446    /// )
447    ///
448    /// #example(
449    ///   title: "Different kinds of patterns",
450    ///   ```
451    ///   #assert.eq("Is there a".match("for this?"), none)
452    ///   #"The time of my life.".match(regex("[mit]+e"))
453    ///   ```
454    /// )
455    #[func]
456    pub fn match_(
457        &self,
458        /// The pattern to search for.
459        pattern: StrPattern,
460    ) -> Option<Dict> {
461        match pattern {
462            StrPattern::Str(pat) => {
463                self.0.match_indices(pat.as_str()).next().map(match_to_dict)
464            }
465            StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
466        }
467    }
468
469    /// Searches for the specified pattern in the string and returns an array of
470    /// dictionaries with details about all matches. For details about the
471    /// returned dictionaries, see @str.match[above].
472    ///
473    /// ```example
474    /// #"Day by Day.".matches("Day")
475    /// ```
476    #[func]
477    pub fn matches(
478        &self,
479        /// The pattern to search for.
480        pattern: StrPattern,
481    ) -> Array {
482        match pattern {
483            StrPattern::Str(pat) => self
484                .0
485                .match_indices(pat.as_str())
486                .map(match_to_dict)
487                .map(Value::Dict)
488                .collect(),
489            StrPattern::Regex(re) => re
490                .captures_iter(self)
491                .map(captures_to_dict)
492                .map(Value::Dict)
493                .collect(),
494        }
495    }
496
497    /// Replace at most `count` occurrences of the given pattern with a
498    /// replacement string or function (beginning from the start). If no count
499    /// is given, all occurrences are replaced.
500    #[func]
501    pub fn replace(
502        &self,
503        engine: &mut Engine,
504        context: Tracked<Context>,
505        /// The pattern to search for.
506        pattern: StrPattern,
507        /// The string to replace the matches with or a function that gets a
508        /// dictionary for each match and can return individual replacement
509        /// strings.
510        ///
511        /// The dictionary passed to the function has the same shape as the
512        /// dictionary returned by @str.match[`match`].
513        replacement: Replacement,
514        /// If given, only the first `count` matches of the pattern are
515        /// replaced.
516        #[named]
517        count: Option<usize>,
518    ) -> SourceResult<Str> {
519        // Heuristic: Assume the new string is about the same length as
520        // the current string.
521        let mut output = EcoString::with_capacity(self.as_str().len());
522
523        // Replace one match of a pattern with the replacement.
524        let mut last_match = 0;
525        let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
526            // Push everything until the match.
527            output.push_str(&self[last_match..range.start]);
528            last_match = range.end;
529
530            // Determine and push the replacement.
531            match &replacement {
532                Replacement::Str(s) => output.push_str(s),
533                Replacement::Func(func) => {
534                    let piece = func
535                        .call(engine, context, [dict])?
536                        .cast::<Str>()
537                        .at(func.span())?;
538                    output.push_str(&piece);
539                }
540            }
541
542            Ok(())
543        };
544
545        // Iterate over the matches of the `pattern`.
546        let count = count.unwrap_or(usize::MAX);
547        match &pattern {
548            StrPattern::Str(pat) => {
549                for m in self.match_indices(pat.as_str()).take(count) {
550                    let (start, text) = m;
551                    handle_match(start..start + text.len(), match_to_dict(m))?;
552                }
553            }
554            StrPattern::Regex(re) => {
555                for caps in re.captures_iter(self).take(count) {
556                    // Extract the entire match over all capture groups.
557                    let m = caps.get(0).unwrap();
558                    handle_match(m.start()..m.end(), captures_to_dict(caps))?;
559                }
560            }
561        }
562
563        // Push the remainder.
564        output.push_str(&self[last_match..]);
565        Ok(output.into())
566    }
567
568    /// Removes matches of a pattern from one or both sides of the string, once
569    /// or repeatedly and returns the resulting string.
570    #[func]
571    pub fn trim(
572        &self,
573        /// The pattern to search for. If `{none}`, trims white spaces.
574        #[default]
575        pattern: Option<StrPattern>,
576        /// Can be `{start}` or `{end}` to only trim the start or end of the
577        /// string. If omitted, both sides are trimmed.
578        #[named]
579        at: Option<StrSide>,
580        /// Whether to repeatedly removes matches of the pattern or just once.
581        /// Defaults to `{true}`.
582        #[named]
583        #[default(true)]
584        repeat: bool,
585    ) -> Str {
586        let mut start = matches!(at, Some(StrSide::Start) | None);
587        let end = matches!(at, Some(StrSide::End) | None);
588
589        let trimmed = match pattern {
590            None => match at {
591                None => self.0.trim(),
592                Some(StrSide::Start) => self.0.trim_start(),
593                Some(StrSide::End) => self.0.trim_end(),
594            },
595            Some(StrPattern::Str(pat)) => {
596                let pat = pat.as_str();
597                let mut s = self.as_str();
598                if repeat {
599                    if start {
600                        s = s.trim_start_matches(pat);
601                    }
602                    if end {
603                        s = s.trim_end_matches(pat);
604                    }
605                } else {
606                    if start {
607                        s = s.strip_prefix(pat).unwrap_or(s);
608                    }
609                    if end {
610                        s = s.strip_suffix(pat).unwrap_or(s);
611                    }
612                }
613                s
614            }
615            Some(StrPattern::Regex(re)) => {
616                let s = self.as_str();
617                let mut last = None;
618                let mut range = 0..s.len();
619
620                for m in re.find_iter(s) {
621                    // Does this match follow directly after the last one?
622                    let consecutive = last == Some(m.start());
623
624                    // As long as we're at the beginning or in a consecutive run
625                    // of matches, and we're still trimming at the start, trim.
626                    start &= m.start() == 0 || consecutive;
627                    if start {
628                        range.start = m.end();
629                        start &= repeat;
630                    }
631
632                    // Reset end trim if we aren't consecutive anymore or aren't
633                    // repeating.
634                    if end && (!consecutive || !repeat) {
635                        range.end = m.start();
636                    }
637
638                    last = Some(m.end());
639                }
640
641                // Is the last match directly at the end?
642                if last.is_some_and(|last| last < s.len()) {
643                    range.end = s.len();
644                }
645
646                &s[range.start..range.start.max(range.end)]
647            }
648        };
649
650        trimmed.into()
651    }
652
653    /// Splits a string at matches of a specified pattern and returns an array
654    /// of the resulting parts.
655    ///
656    /// When the empty string is used as a separator, it separates every
657    /// character (i.e., Unicode code point) in the string, along with the
658    /// beginning and end of the string. In practice, this means that the
659    /// resulting list of parts will contain the empty string at the start and
660    /// end of the list.
661    #[func]
662    pub fn split(
663        &self,
664        /// The pattern to split at. Defaults to whitespace.
665        #[default]
666        pattern: Option<StrPattern>,
667    ) -> Array {
668        let s = self.as_str();
669        match pattern {
670            None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
671            Some(StrPattern::Str(pat)) => {
672                s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
673            }
674            Some(StrPattern::Regex(re)) => {
675                re.split(s).map(|v| Value::Str(v.into())).collect()
676            }
677        }
678    }
679
680    /// Reverses the string.
681    ///
682    /// More specifically, this returns a string with the same grapheme
683    /// clusters, in reversed order.
684    ///
685    /// ```example
686    /// #"Pirate flag: 🏴‍☠️".rev()
687    /// ```
688    #[func(title = "Reverse")]
689    pub fn rev(&self) -> Str {
690        let mut s = EcoString::with_capacity(self.0.len());
691        for grapheme in self.as_str().graphemes(true).rev() {
692            s.push_str(grapheme);
693        }
694        s.into()
695    }
696}
697
698impl Deref for Str {
699    type Target = str;
700
701    fn deref(&self) -> &str {
702        &self.0
703    }
704}
705
706impl Debug for Str {
707    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
708        Debug::fmt(self.as_str(), f)
709    }
710}
711
712impl Display for Str {
713    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
714        Display::fmt(self.as_str(), f)
715    }
716}
717
718impl Repr for Str {
719    fn repr(&self) -> EcoString {
720        self.as_str().repr()
721    }
722}
723
724impl Repr for EcoString {
725    fn repr(&self) -> EcoString {
726        self.as_str().repr()
727    }
728}
729
730impl Repr for str {
731    fn repr(&self) -> EcoString {
732        let mut r = EcoString::with_capacity(self.len() + 2);
733        r.push('"');
734        for c in self.chars() {
735            match c {
736                '\0' => r.push_str(r"\u{0}"),
737                '\'' => r.push('\''),
738                '"' => r.push_str(r#"\""#),
739                _ => r.extend(c.escape_debug()),
740            }
741        }
742        r.push('"');
743        r
744    }
745}
746
747impl Repr for char {
748    fn repr(&self) -> EcoString {
749        EcoString::from(*self).repr()
750    }
751}
752
753impl Add for Str {
754    type Output = Self;
755
756    fn add(mut self, rhs: Self) -> Self::Output {
757        self += rhs;
758        self
759    }
760}
761
762impl AddAssign for Str {
763    fn add_assign(&mut self, rhs: Self) {
764        self.0.push_str(rhs.as_str());
765    }
766}
767
768impl AsRef<str> for Str {
769    fn as_ref(&self) -> &str {
770        self
771    }
772}
773
774impl Borrow<str> for Str {
775    fn borrow(&self) -> &str {
776        self
777    }
778}
779
780impl From<char> for Str {
781    fn from(c: char) -> Self {
782        Self(c.into())
783    }
784}
785
786impl From<&str> for Str {
787    fn from(s: &str) -> Self {
788        Self(s.into())
789    }
790}
791
792impl From<EcoString> for Str {
793    fn from(s: EcoString) -> Self {
794        Self(s)
795    }
796}
797
798impl From<String> for Str {
799    fn from(s: String) -> Self {
800        Self(s.into())
801    }
802}
803
804impl From<Cow<'_, str>> for Str {
805    fn from(s: Cow<str>) -> Self {
806        Self(s.into())
807    }
808}
809
810impl FromIterator<char> for Str {
811    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
812        Self(iter.into_iter().collect())
813    }
814}
815
816impl From<Str> for EcoString {
817    fn from(str: Str) -> Self {
818        str.0
819    }
820}
821
822impl From<Str> for String {
823    fn from(s: Str) -> Self {
824        s.0.into()
825    }
826}
827
828cast! {
829    char,
830    self => Value::Str(self.into()),
831    string: Str => {
832        let mut chars = string.chars();
833        match (chars.next(), chars.next()) {
834            (Some(c), None) => c,
835            _ => bail!("expected exactly one character"),
836        }
837    },
838}
839
840cast! {
841    &str,
842    self => Value::Str(self.into()),
843}
844
845cast! {
846    EcoString,
847    self => Value::Str(self.into()),
848    v: Str => v.into(),
849}
850
851cast! {
852    String,
853    self => Value::Str(self.into()),
854    v: Str => v.into(),
855}
856
857/// A value that can be cast to a string.
858pub enum ToStr {
859    /// A string value ready to be used as-is.
860    Str(Str),
861    /// An integer about to be formatted in a given base.
862    Int(i64),
863}
864
865cast! {
866    ToStr,
867    v: i64 => Self::Int(v),
868    v: f64 => Self::Str(repr::display_float(v).into()),
869    v: Decimal => Self::Str(format_str!("{}", v)),
870    v: Version => Self::Str(format_str!("{}", v)),
871    v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid UTF-8")?),
872    v: Label => Self::Str(v.resolve().as_str().into()),
873    v: Type => Self::Str(v.long_name().into()),
874    v: Str => Self::Str(v),
875}
876
877/// Similar to `Option<i64>`, but the default value casts to `10` rather than
878/// `none`, so that the right default value is documented.
879#[derive(Debug, Copy, Clone)]
880pub enum Base {
881    Default,
882    User(i64),
883}
884
885impl Base {
886    pub fn value(self) -> i64 {
887        match self {
888            Self::Default => 10,
889            Self::User(b) => b,
890        }
891    }
892}
893
894cast! {
895    Base,
896    self => self.value().into_value(),
897    v: i64 => Self::User(v),
898}
899
900/// A Unicode normalization form.
901#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
902pub enum UnicodeNormalForm {
903    /// Canonical composition where e.g. accented letters are turned into a
904    /// single Unicode codepoint.
905    #[string("nfc")]
906    Nfc,
907    /// Canonical decomposition where e.g. accented letters are split into a
908    /// separate base and diacritic.
909    #[string("nfd")]
910    Nfd,
911    /// Like NFC, but using the Unicode compatibility decompositions.
912    #[string("nfkc")]
913    Nfkc,
914    /// Like NFD, but using the Unicode compatibility decompositions.
915    #[string("nfkd")]
916    Nfkd,
917}
918
919/// Convert an item of std's `match_indices` to a dictionary.
920fn match_to_dict((start, text): (usize, &str)) -> Dict {
921    dict! {
922        "start" => start,
923        "end" => start + text.len(),
924        "text" => text,
925        "captures" => Array::new(),
926    }
927}
928
929/// Convert regex captures to a dictionary.
930fn captures_to_dict(cap: regex::Captures) -> Dict {
931    let m = cap.get(0).expect("missing first match");
932    dict! {
933        "start" => m.start(),
934        "end" => m.end(),
935        "text" => m.as_str(),
936        "captures" =>  cap.iter()
937            .skip(1)
938            .map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
939            .collect::<Array>(),
940    }
941}
942
943/// The out of bounds access error message.
944#[cold]
945fn out_of_bounds(index: i64, len: usize) -> EcoString {
946    eco_format!("string index out of bounds (index: {index}, len: {len})")
947}
948
949/// The out of bounds access error message when no default value was given.
950#[cold]
951fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
952    eco_format!(
953        "no default value was specified and string index out of bounds \
954         (index: {index}, len: {len})"
955    )
956}
957
958/// The char boundary access error message.
959#[cold]
960fn not_a_char_boundary(index: i64) -> EcoString {
961    eco_format!("string index {index} is not a character boundary")
962}
963
964/// The error message when the string is empty.
965#[cold]
966fn string_is_empty() -> EcoString {
967    "string is empty".into()
968}
969
970/// A regular expression.
971///
972/// Can be used as a @reference:styling:show-rules[show rule selector] and with
973/// @str[string methods] like `find`, `split`, `replace`, and `match`.
974///
975/// #link("https://docs.rs/regex/latest/regex/#syntax")[See here] for a
976/// specification of the supported syntax.
977///
978/// = Example <example>
979/// ```example
980/// // Works with string methods.
981/// #"a,b;c".split(regex("[,;]"))
982///
983/// // Works with show rules.
984/// #show regex("\\d+"): set text(red)
985///
986/// The numbers 1 to 10.
987/// ```
988#[ty(scope)]
989#[derive(Debug, Clone)]
990pub struct Regex(regex::Regex);
991
992impl Regex {
993    /// Create a new regular expression.
994    pub fn new(re: &str) -> StrResult<Self> {
995        regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
996    }
997}
998
999#[scope]
1000impl Regex {
1001    /// Create a regular expression from a string.
1002    #[func(constructor)]
1003    pub fn construct(
1004        /// The regular expression as a string.
1005        ///
1006        /// Both Typst strings and regular expressions use backslashes for
1007        /// escaping. To produce a regex escape sequence that is also valid in
1008        /// Typst, you need to escape the backslash itself (e.g., writing
1009        /// `{regex("\\\\")}` for the regex `\\`). Regex escape sequences that
1010        /// are not valid Typst escape sequences (e.g., `\d` and `\b`) can be
1011        /// entered into strings directly, but it's good practice to still
1012        /// escape them to avoid ambiguity (i.e., `{regex("\\b\\d")}`). See the
1013        /// @str:escapes[list of valid string escape sequences].
1014        ///
1015        /// If you need many escape sequences, you can also create a raw element
1016        /// and extract its text to use it for your regular expressions:
1017        /// ``` {regex(`\d+\.\d+\.\d+`.text)}```.
1018        regex: Spanned<Str>,
1019    ) -> SourceResult<Regex> {
1020        Self::new(&regex.v).at(regex.span)
1021    }
1022}
1023
1024impl Deref for Regex {
1025    type Target = regex::Regex;
1026
1027    fn deref(&self) -> &Self::Target {
1028        &self.0
1029    }
1030}
1031
1032impl Repr for Regex {
1033    fn repr(&self) -> EcoString {
1034        eco_format!("regex({})", self.0.as_str().repr())
1035    }
1036}
1037
1038impl PartialEq for Regex {
1039    fn eq(&self, other: &Self) -> bool {
1040        self.0.as_str() == other.0.as_str()
1041    }
1042}
1043
1044impl Hash for Regex {
1045    fn hash<H: Hasher>(&self, state: &mut H) {
1046        self.0.as_str().hash(state);
1047    }
1048}
1049
1050/// A pattern which can be searched for in a string.
1051#[derive(Debug, Clone)]
1052pub enum StrPattern {
1053    /// Just a string.
1054    Str(Str),
1055    /// A regular expression.
1056    Regex(Regex),
1057}
1058
1059cast! {
1060    StrPattern,
1061    self => match self {
1062        Self::Str(v) => v.into_value(),
1063        Self::Regex(v) => v.into_value(),
1064    },
1065    v: Str => Self::Str(v),
1066    v: Regex => Self::Regex(v),
1067}
1068
1069/// A side of a string.
1070#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
1071pub enum StrSide {
1072    /// The logical start of the string, may be left or right depending on the
1073    /// language.
1074    Start,
1075    /// The logical end of the string.
1076    End,
1077}
1078
1079cast! {
1080    StrSide,
1081    v: Alignment => match v {
1082        Alignment::START => Self::Start,
1083        Alignment::END => Self::End,
1084        _ => bail!("expected either `start` or `end`"),
1085    },
1086}
1087
1088/// A replacement for a matched [`Str`]
1089pub enum Replacement {
1090    /// A string a match is replaced with.
1091    Str(Str),
1092    /// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`)
1093    /// whose output is inserted for the match.
1094    Func(Func),
1095}
1096
1097cast! {
1098    Replacement,
1099    self => match self {
1100        Self::Str(v) => v.into_value(),
1101        Self::Func(v) => v.into_value(),
1102    },
1103    v: Str => Self::Str(v),
1104    v: Func => Self::Func(v)
1105}