typst_library/foundations/
str.rs

1use std::borrow::{Borrow, Cow};
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref, Range};
5
6use comemo::Tracked;
7use ecow::EcoString;
8use serde::{Deserialize, Serialize};
9use typst_syntax::{Span, Spanned};
10use unicode_normalization::UnicodeNormalization;
11use unicode_segmentation::UnicodeSegmentation;
12
13use crate::diag::{At, SourceResult, StrResult, bail};
14use crate::engine::Engine;
15use crate::foundations::{
16    Array, Bytes, Cast, Context, Decimal, Dict, Func, IntoValue, Label, Repr, Type,
17    Value, Version, cast, dict, func, repr, scope, ty,
18};
19use crate::layout::Alignment;
20
21/// Create a new [`Str`] from a format string.
22#[macro_export]
23#[doc(hidden)]
24macro_rules! __format_str {
25    ($($tts:tt)*) => {{
26        $crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
27    }};
28}
29
30#[doc(hidden)]
31pub use ecow::eco_format;
32
33#[doc(inline)]
34pub use crate::__format_str as format_str;
35
36/// A sequence of Unicode codepoints.
37///
38/// You can iterate over the grapheme clusters of the string using a [for
39/// loop]($scripting/#loops). Grapheme clusters are basically characters but
40/// keep together things that belong together, e.g. multiple codepoints that
41/// together form a flag emoji. Strings can be added with the `+` operator,
42/// [joined together]($scripting/#blocks) and multiplied with integers.
43///
44/// Typst provides utility methods for string manipulation. Many of these
45/// methods (e.g., [`split`]($str.split), [`trim`]($str.trim) and
46/// [`replace`]($str.replace)) operate on _patterns:_ A pattern can be either a
47/// string or a [regular expression]($regex). This makes the methods quite
48/// versatile.
49///
50/// All lengths and indices are expressed in terms of UTF-8 bytes. Indices are
51/// zero-based and negative indices wrap around to the end of the string.
52///
53/// You can convert a value to a string with this type's constructor.
54///
55/// # Example
56/// ```example
57/// #"hello world!" \
58/// #"\"hello\n  world\"!" \
59/// #"1 2 3".split() \
60/// #"1,2;3".split(regex("[,;]")) \
61/// #(regex("\d+") in "ten euros") \
62/// #(regex("\d+") in "10 euros")
63/// ```
64///
65/// # Escape sequences { #escapes }
66/// Just like in markup, you can escape a few symbols in strings:
67/// - `[\\]` for a backslash
68/// - `[\"]` for a quote
69/// - `[\n]` for a newline
70/// - `[\r]` for a carriage return
71/// - `[\t]` for a tab
72/// - `[\u{1f600}]` for a hexadecimal Unicode escape sequence
73#[ty(scope, cast, title = "String")]
74#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
75#[derive(Serialize, Deserialize)]
76#[serde(transparent)]
77pub struct Str(EcoString);
78
79impl Str {
80    /// Create a new, empty string.
81    pub fn new() -> Self {
82        Self(EcoString::new())
83    }
84
85    /// Return `true` if the length is 0.
86    pub fn is_empty(&self) -> bool {
87        self.0.is_empty()
88    }
89
90    /// Repeat the string a number of times.
91    pub fn repeat(&self, n: usize) -> StrResult<Self> {
92        if self.0.len().checked_mul(n).is_none() {
93            return Err(eco_format!("cannot repeat this string {n} times"));
94        }
95        Ok(Self(self.0.repeat(n)))
96    }
97
98    /// A string slice containing the entire string.
99    pub fn as_str(&self) -> &str {
100        self
101    }
102
103    /// Resolve an index or throw an out of bounds error.
104    fn locate(&self, index: i64) -> StrResult<usize> {
105        self.locate_opt(index)?
106            .ok_or_else(|| out_of_bounds(index, self.len()))
107    }
108
109    /// Resolve an index, if it is within bounds and on a valid char boundary.
110    ///
111    /// `index == len` is considered in bounds.
112    fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
113        let wrapped =
114            if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
115
116        let resolved = wrapped
117            .and_then(|v| usize::try_from(v).ok())
118            .filter(|&v| v <= self.0.len());
119
120        if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
121            return Err(not_a_char_boundary(index));
122        }
123
124        Ok(resolved)
125    }
126}
127
128#[scope]
129impl Str {
130    /// Converts a value to a string.
131    ///
132    /// - Integers are formatted in base 10. This can be overridden with the
133    ///   optional `base` parameter.
134    /// - Floats are formatted in base 10 and never in exponential notation.
135    /// - Negative integers and floats are formatted with the Unicode minus sign
136    ///   ("−" U+2212) instead of the ASCII minus sign ("-" U+002D).
137    /// - From labels the name is extracted.
138    /// - Bytes are decoded as UTF-8.
139    ///
140    /// If you wish to convert from and to Unicode code points, see the
141    /// [`to-unicode`]($str.to-unicode) and [`from-unicode`]($str.from-unicode)
142    /// functions.
143    ///
144    /// ```example
145    /// #str(10) \
146    /// #str(4000, base: 16) \
147    /// #str(2.7) \
148    /// #str(1e8) \
149    /// #str(<intro>)
150    /// ```
151    #[func(constructor)]
152    pub fn construct(
153        /// The value that should be converted to a string.
154        value: ToStr,
155        /// The base (radix) to display integers in, between 2 and 36.
156        #[named]
157        #[default(Spanned::new(10, Span::detached()))]
158        base: Spanned<i64>,
159    ) -> SourceResult<Str> {
160        Ok(match value {
161            ToStr::Str(s) => {
162                if base.v != 10 {
163                    bail!(base.span, "base is only supported for integers");
164                }
165                s
166            }
167            ToStr::Int(n) => {
168                if base.v < 2 || base.v > 36 {
169                    bail!(base.span, "base must be between 2 and 36");
170                }
171                repr::format_int_with_base(n, base.v).into()
172            }
173        })
174    }
175
176    /// The length of the string in UTF-8 encoded bytes.
177    #[func(title = "Length")]
178    pub fn len(&self) -> usize {
179        self.0.len()
180    }
181
182    /// Extracts the first grapheme cluster of the string.
183    ///
184    /// Returns the provided default value if the string is empty or fails with
185    /// an error if no default value was specified.
186    #[func]
187    pub fn first(
188        &self,
189        /// A default value to return if the string is empty.
190        #[named]
191        default: Option<Str>,
192    ) -> StrResult<Str> {
193        self.0
194            .graphemes(true)
195            .next()
196            .map(Into::into)
197            .or(default)
198            .ok_or_else(string_is_empty)
199    }
200
201    /// Extracts the last grapheme cluster of the string.
202    ///
203    /// Returns the provided default value if the string is empty or fails with
204    /// an error if no default value was specified.
205    #[func]
206    pub fn last(
207        &self,
208        /// A default value to return if the string is empty.
209        #[named]
210        default: Option<Str>,
211    ) -> StrResult<Str> {
212        self.0
213            .graphemes(true)
214            .next_back()
215            .map(Into::into)
216            .or(default)
217            .ok_or_else(string_is_empty)
218    }
219
220    /// Extracts the first grapheme cluster after the specified index. Returns
221    /// the default value if the index is out of bounds or fails with an error
222    /// if no default value was specified.
223    #[func]
224    pub fn at(
225        &self,
226        /// The byte index. If negative, indexes from the back.
227        index: i64,
228        /// A default value to return if the index is out of bounds.
229        #[named]
230        default: Option<Value>,
231    ) -> StrResult<Value> {
232        let len = self.len();
233        self.locate_opt(index)?
234            .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
235            .or(default)
236            .ok_or_else(|| no_default_and_out_of_bounds(index, len))
237    }
238
239    /// Extracts a substring of the string.
240    /// Fails with an error if the start or end index is out of bounds.
241    #[func]
242    pub fn slice(
243        &self,
244        /// The start byte index (inclusive). If negative, indexes from the
245        /// back.
246        start: i64,
247        /// The end byte index (exclusive). If omitted, the whole slice until
248        /// the end of the string is extracted. If negative, indexes from the
249        /// back.
250        #[default]
251        end: Option<i64>,
252        /// The number of bytes to extract. This is equivalent to passing
253        /// `start + count` as the `end` position. Mutually exclusive with `end`.
254        #[named]
255        count: Option<i64>,
256    ) -> StrResult<Str> {
257        let start = self.locate(start)?;
258        let end = end.or(count.map(|c| start as i64 + c));
259        let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
260        Ok(self.0[start..end].into())
261    }
262
263    /// Returns the grapheme clusters of the string as an array of substrings.
264    #[func]
265    pub fn clusters(&self) -> Array {
266        self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
267    }
268
269    /// Returns the Unicode codepoints of the string as an array of substrings.
270    #[func]
271    pub fn codepoints(&self) -> Array {
272        self.chars().map(|c| Value::Str(c.into())).collect()
273    }
274
275    /// Converts a character into its corresponding code point.
276    ///
277    /// ```example
278    /// #"a".to-unicode() \
279    /// #("a\u{0300}"
280    ///    .codepoints()
281    ///    .map(str.to-unicode))
282    /// ```
283    #[func]
284    pub fn to_unicode(
285        /// The character that should be converted.
286        character: char,
287    ) -> u32 {
288        character as u32
289    }
290
291    /// Converts a unicode code point into its corresponding string.
292    ///
293    /// ```example
294    /// #str.from-unicode(97)
295    /// ```
296    #[func]
297    pub fn from_unicode(
298        /// The code point that should be converted.
299        value: u32,
300    ) -> StrResult<Str> {
301        let c: char = value
302            .try_into()
303            .map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
304        Ok(c.into())
305    }
306
307    /// Normalizes the string to the given Unicode normal form.
308    ///
309    /// This is useful when manipulating strings containing Unicode combining
310    /// characters.
311    ///
312    /// ```typ
313    /// #assert.eq("é".normalize(form: "nfd"), "e\u{0301}")
314    /// #assert.eq("ſ́".normalize(form: "nfkc"), "ś")
315    /// ```
316    #[func]
317    pub fn normalize(
318        &self,
319        #[named]
320        #[default(UnicodeNormalForm::Nfc)]
321        form: UnicodeNormalForm,
322    ) -> Str {
323        match form {
324            UnicodeNormalForm::Nfc => self.nfc().collect(),
325            UnicodeNormalForm::Nfd => self.nfd().collect(),
326            UnicodeNormalForm::Nfkc => self.nfkc().collect(),
327            UnicodeNormalForm::Nfkd => self.nfkd().collect(),
328        }
329    }
330
331    /// Whether the string contains the specified pattern.
332    ///
333    /// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
334    /// instead of `{"abcd".contains("bc")}`.
335    #[func]
336    pub fn contains(
337        &self,
338        /// The pattern to search for.
339        pattern: StrPattern,
340    ) -> bool {
341        match pattern {
342            StrPattern::Str(pat) => self.0.contains(pat.as_str()),
343            StrPattern::Regex(re) => re.is_match(self),
344        }
345    }
346
347    /// Whether the string starts with the specified pattern.
348    #[func]
349    pub fn starts_with(
350        &self,
351        /// The pattern the string might start with.
352        pattern: StrPattern,
353    ) -> bool {
354        match pattern {
355            StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
356            StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
357        }
358    }
359
360    /// Whether the string ends with the specified pattern.
361    #[func]
362    pub fn ends_with(
363        &self,
364        /// The pattern the string might end with.
365        pattern: StrPattern,
366    ) -> bool {
367        match pattern {
368            StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
369            StrPattern::Regex(re) => {
370                let mut start_byte = 0;
371                while let Some(mat) = re.find_at(self, start_byte) {
372                    if mat.end() == self.0.len() {
373                        return true;
374                    }
375
376                    // There might still be a match overlapping this one, so
377                    // restart at the next code point.
378                    let Some(c) = self[mat.start()..].chars().next() else { break };
379                    start_byte = mat.start() + c.len_utf8();
380                }
381                false
382            }
383        }
384    }
385
386    /// Searches for the specified pattern in the string and returns the first
387    /// match as a string or `{none}` if there is no match.
388    #[func]
389    pub fn find(
390        &self,
391        /// The pattern to search for.
392        pattern: StrPattern,
393    ) -> Option<Str> {
394        match pattern {
395            StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
396            StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
397        }
398    }
399
400    /// Searches for the specified pattern in the string and returns the index
401    /// of the first match as an integer or `{none}` if there is no match.
402    #[func]
403    pub fn position(
404        &self,
405        /// The pattern to search for.
406        pattern: StrPattern,
407    ) -> Option<usize> {
408        match pattern {
409            StrPattern::Str(pat) => self.0.find(pat.as_str()),
410            StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
411        }
412    }
413
414    /// Searches for the specified pattern in the string and returns a
415    /// dictionary with details about the first match or `{none}` if there is no
416    /// match.
417    ///
418    /// The returned dictionary has the following keys:
419    /// - `start`: The start offset of the match
420    /// - `end`: The end offset of the match
421    /// - `text`: The text that matched.
422    /// - `captures`: An array containing a string for each matched capturing
423    ///   group. The first item of the array contains the first matched
424    ///   capturing, not the whole match! This is empty unless the `pattern` was
425    ///   a regex with capturing groups.
426    ///
427    /// ```example:"Shape of the returned dictionary"
428    /// #let pat = regex("not (a|an) (apple|cat)")
429    /// #"I'm a doctor, not an apple.".match(pat) \
430    /// #"I am not a cat!".match(pat)
431    /// ```
432    ///
433    /// ```example:"Different kinds of patterns"
434    /// #assert.eq("Is there a".match("for this?"), none)
435    /// #"The time of my life.".match(regex("[mit]+e"))
436    /// ```
437    #[func]
438    pub fn match_(
439        &self,
440        /// The pattern to search for.
441        pattern: StrPattern,
442    ) -> Option<Dict> {
443        match pattern {
444            StrPattern::Str(pat) => {
445                self.0.match_indices(pat.as_str()).next().map(match_to_dict)
446            }
447            StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
448        }
449    }
450
451    /// Searches for the specified pattern in the string and returns an array of
452    /// dictionaries with details about all matches. For details about the
453    /// returned dictionaries, see [above]($str.match).
454    ///
455    /// ```example
456    /// #"Day by Day.".matches("Day")
457    /// ```
458    #[func]
459    pub fn matches(
460        &self,
461        /// The pattern to search for.
462        pattern: StrPattern,
463    ) -> Array {
464        match pattern {
465            StrPattern::Str(pat) => self
466                .0
467                .match_indices(pat.as_str())
468                .map(match_to_dict)
469                .map(Value::Dict)
470                .collect(),
471            StrPattern::Regex(re) => re
472                .captures_iter(self)
473                .map(captures_to_dict)
474                .map(Value::Dict)
475                .collect(),
476        }
477    }
478
479    /// Replace at most `count` occurrences of the given pattern with a
480    /// replacement string or function (beginning from the start). If no count
481    /// is given, all occurrences are replaced.
482    #[func]
483    pub fn replace(
484        &self,
485        engine: &mut Engine,
486        context: Tracked<Context>,
487        /// The pattern to search for.
488        pattern: StrPattern,
489        /// The string to replace the matches with or a function that gets a
490        /// dictionary for each match and can return individual replacement
491        /// strings.
492        ///
493        /// The dictionary passed to the function has the same shape as the
494        /// dictionary returned by [`match`]($str.match).
495        replacement: Replacement,
496        ///  If given, only the first `count` matches of the pattern are placed.
497        #[named]
498        count: Option<usize>,
499    ) -> SourceResult<Str> {
500        // Heuristic: Assume the new string is about the same length as
501        // the current string.
502        let mut output = EcoString::with_capacity(self.as_str().len());
503
504        // Replace one match of a pattern with the replacement.
505        let mut last_match = 0;
506        let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
507            // Push everything until the match.
508            output.push_str(&self[last_match..range.start]);
509            last_match = range.end;
510
511            // Determine and push the replacement.
512            match &replacement {
513                Replacement::Str(s) => output.push_str(s),
514                Replacement::Func(func) => {
515                    let piece = func
516                        .call(engine, context, [dict])?
517                        .cast::<Str>()
518                        .at(func.span())?;
519                    output.push_str(&piece);
520                }
521            }
522
523            Ok(())
524        };
525
526        // Iterate over the matches of the `pattern`.
527        let count = count.unwrap_or(usize::MAX);
528        match &pattern {
529            StrPattern::Str(pat) => {
530                for m in self.match_indices(pat.as_str()).take(count) {
531                    let (start, text) = m;
532                    handle_match(start..start + text.len(), match_to_dict(m))?;
533                }
534            }
535            StrPattern::Regex(re) => {
536                for caps in re.captures_iter(self).take(count) {
537                    // Extract the entire match over all capture groups.
538                    let m = caps.get(0).unwrap();
539                    handle_match(m.start()..m.end(), captures_to_dict(caps))?;
540                }
541            }
542        }
543
544        // Push the remainder.
545        output.push_str(&self[last_match..]);
546        Ok(output.into())
547    }
548
549    /// Removes matches of a pattern from one or both sides of the string, once or
550    /// repeatedly and returns the resulting string.
551    #[func]
552    pub fn trim(
553        &self,
554        /// The pattern to search for. If `{none}`, trims white spaces.
555        #[default]
556        pattern: Option<StrPattern>,
557        /// Can be `{start}` or `{end}` to only trim the start or end of the
558        /// string. If omitted, both sides are trimmed.
559        #[named]
560        at: Option<StrSide>,
561        /// Whether to repeatedly removes matches of the pattern or just once.
562        /// Defaults to `{true}`.
563        #[named]
564        #[default(true)]
565        repeat: bool,
566    ) -> Str {
567        let mut start = matches!(at, Some(StrSide::Start) | None);
568        let end = matches!(at, Some(StrSide::End) | None);
569
570        let trimmed = match pattern {
571            None => match at {
572                None => self.0.trim(),
573                Some(StrSide::Start) => self.0.trim_start(),
574                Some(StrSide::End) => self.0.trim_end(),
575            },
576            Some(StrPattern::Str(pat)) => {
577                let pat = pat.as_str();
578                let mut s = self.as_str();
579                if repeat {
580                    if start {
581                        s = s.trim_start_matches(pat);
582                    }
583                    if end {
584                        s = s.trim_end_matches(pat);
585                    }
586                } else {
587                    if start {
588                        s = s.strip_prefix(pat).unwrap_or(s);
589                    }
590                    if end {
591                        s = s.strip_suffix(pat).unwrap_or(s);
592                    }
593                }
594                s
595            }
596            Some(StrPattern::Regex(re)) => {
597                let s = self.as_str();
598                let mut last = None;
599                let mut range = 0..s.len();
600
601                for m in re.find_iter(s) {
602                    // Does this match follow directly after the last one?
603                    let consecutive = last == Some(m.start());
604
605                    // As long as we're at the beginning or in a consecutive run
606                    // of matches, and we're still trimming at the start, trim.
607                    start &= m.start() == 0 || consecutive;
608                    if start {
609                        range.start = m.end();
610                        start &= repeat;
611                    }
612
613                    // Reset end trim if we aren't consecutive anymore or aren't
614                    // repeating.
615                    if end && (!consecutive || !repeat) {
616                        range.end = m.start();
617                    }
618
619                    last = Some(m.end());
620                }
621
622                // Is the last match directly at the end?
623                if last.is_some_and(|last| last < s.len()) {
624                    range.end = s.len();
625                }
626
627                &s[range.start..range.start.max(range.end)]
628            }
629        };
630
631        trimmed.into()
632    }
633
634    /// Splits a string at matches of a specified pattern and returns an array
635    /// of the resulting parts.
636    ///
637    /// When the empty string is used as a separator, it separates every
638    /// character (i.e., Unicode code point) in the string, along with the
639    /// beginning and end of the string. In practice, this means that the
640    /// resulting list of parts will contain the empty string at the start
641    /// and end of the list.
642    #[func]
643    pub fn split(
644        &self,
645        /// The pattern to split at. Defaults to whitespace.
646        #[default]
647        pattern: Option<StrPattern>,
648    ) -> Array {
649        let s = self.as_str();
650        match pattern {
651            None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
652            Some(StrPattern::Str(pat)) => {
653                s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
654            }
655            Some(StrPattern::Regex(re)) => {
656                re.split(s).map(|v| Value::Str(v.into())).collect()
657            }
658        }
659    }
660
661    /// Reverse the string.
662    #[func(title = "Reverse")]
663    pub fn rev(&self) -> Str {
664        let mut s = EcoString::with_capacity(self.0.len());
665        for grapheme in self.as_str().graphemes(true).rev() {
666            s.push_str(grapheme);
667        }
668        s.into()
669    }
670}
671
672impl Deref for Str {
673    type Target = str;
674
675    fn deref(&self) -> &str {
676        &self.0
677    }
678}
679
680impl Debug for Str {
681    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
682        Debug::fmt(self.as_str(), f)
683    }
684}
685
686impl Display for Str {
687    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
688        Display::fmt(self.as_str(), f)
689    }
690}
691
692impl Repr for Str {
693    fn repr(&self) -> EcoString {
694        self.as_ref().repr()
695    }
696}
697
698impl Repr for EcoString {
699    fn repr(&self) -> EcoString {
700        self.as_ref().repr()
701    }
702}
703
704impl Repr for str {
705    fn repr(&self) -> EcoString {
706        let mut r = EcoString::with_capacity(self.len() + 2);
707        r.push('"');
708        for c in self.chars() {
709            match c {
710                '\0' => r.push_str(r"\u{0}"),
711                '\'' => r.push('\''),
712                '"' => r.push_str(r#"\""#),
713                _ => r.extend(c.escape_debug()),
714            }
715        }
716        r.push('"');
717        r
718    }
719}
720
721impl Repr for char {
722    fn repr(&self) -> EcoString {
723        EcoString::from(*self).repr()
724    }
725}
726
727impl Add for Str {
728    type Output = Self;
729
730    fn add(mut self, rhs: Self) -> Self::Output {
731        self += rhs;
732        self
733    }
734}
735
736impl AddAssign for Str {
737    fn add_assign(&mut self, rhs: Self) {
738        self.0.push_str(rhs.as_str());
739    }
740}
741
742impl AsRef<str> for Str {
743    fn as_ref(&self) -> &str {
744        self
745    }
746}
747
748impl Borrow<str> for Str {
749    fn borrow(&self) -> &str {
750        self
751    }
752}
753
754impl From<char> for Str {
755    fn from(c: char) -> Self {
756        Self(c.into())
757    }
758}
759
760impl From<&str> for Str {
761    fn from(s: &str) -> Self {
762        Self(s.into())
763    }
764}
765
766impl From<EcoString> for Str {
767    fn from(s: EcoString) -> Self {
768        Self(s)
769    }
770}
771
772impl From<String> for Str {
773    fn from(s: String) -> Self {
774        Self(s.into())
775    }
776}
777
778impl From<Cow<'_, str>> for Str {
779    fn from(s: Cow<str>) -> Self {
780        Self(s.into())
781    }
782}
783
784impl FromIterator<char> for Str {
785    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
786        Self(iter.into_iter().collect())
787    }
788}
789
790impl From<Str> for EcoString {
791    fn from(str: Str) -> Self {
792        str.0
793    }
794}
795
796impl From<Str> for String {
797    fn from(s: Str) -> Self {
798        s.0.into()
799    }
800}
801
802cast! {
803    char,
804    self => Value::Str(self.into()),
805    string: Str => {
806        let mut chars = string.chars();
807        match (chars.next(), chars.next()) {
808            (Some(c), None) => c,
809            _ => bail!("expected exactly one character"),
810        }
811    },
812}
813
814cast! {
815    &str,
816    self => Value::Str(self.into()),
817}
818
819cast! {
820    EcoString,
821    self => Value::Str(self.into()),
822    v: Str => v.into(),
823}
824
825cast! {
826    String,
827    self => Value::Str(self.into()),
828    v: Str => v.into(),
829}
830
831/// A value that can be cast to a string.
832pub enum ToStr {
833    /// A string value ready to be used as-is.
834    Str(Str),
835    /// An integer about to be formatted in a given base.
836    Int(i64),
837}
838
839cast! {
840    ToStr,
841    v: i64 => Self::Int(v),
842    v: f64 => Self::Str(repr::display_float(v).into()),
843    v: Decimal => Self::Str(format_str!("{}", v)),
844    v: Version => Self::Str(format_str!("{}", v)),
845    v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid utf-8")?),
846    v: Label => Self::Str(v.resolve().as_str().into()),
847    v: Type => Self::Str(v.long_name().into()),
848    v: Str => Self::Str(v),
849}
850
851/// A Unicode normalization form.
852#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
853pub enum UnicodeNormalForm {
854    /// Canonical composition where e.g. accented letters are turned into a
855    /// single Unicode codepoint.
856    #[string("nfc")]
857    Nfc,
858    /// Canonical decomposition where e.g. accented letters are split into a
859    /// separate base and diacritic.
860    #[string("nfd")]
861    Nfd,
862    /// Like NFC, but using the Unicode compatibility decompositions.
863    #[string("nfkc")]
864    Nfkc,
865    /// Like NFD, but using the Unicode compatibility decompositions.
866    #[string("nfkd")]
867    Nfkd,
868}
869
870/// Convert an item of std's `match_indices` to a dictionary.
871fn match_to_dict((start, text): (usize, &str)) -> Dict {
872    dict! {
873        "start" => start,
874        "end" => start + text.len(),
875        "text" => text,
876        "captures" => Array::new(),
877    }
878}
879
880/// Convert regex captures to a dictionary.
881fn captures_to_dict(cap: regex::Captures) -> Dict {
882    let m = cap.get(0).expect("missing first match");
883    dict! {
884        "start" => m.start(),
885        "end" => m.end(),
886        "text" => m.as_str(),
887        "captures" =>  cap.iter()
888            .skip(1)
889            .map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
890            .collect::<Array>(),
891    }
892}
893
894/// The out of bounds access error message.
895#[cold]
896fn out_of_bounds(index: i64, len: usize) -> EcoString {
897    eco_format!("string index out of bounds (index: {}, len: {})", index, len)
898}
899
900/// The out of bounds access error message when no default value was given.
901#[cold]
902fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
903    eco_format!(
904        "no default value was specified and string index out of bounds (index: {}, len: {})",
905        index,
906        len
907    )
908}
909
910/// The char boundary access error message.
911#[cold]
912fn not_a_char_boundary(index: i64) -> EcoString {
913    eco_format!("string index {} is not a character boundary", index)
914}
915
916/// The error message when the string is empty.
917#[cold]
918fn string_is_empty() -> EcoString {
919    "string is empty".into()
920}
921
922/// A regular expression.
923///
924/// Can be used as a [show rule selector]($styling/#show-rules) and with
925/// [string methods]($str) like `find`, `split`, and `replace`.
926///
927/// [See here](https://docs.rs/regex/latest/regex/#syntax) for a specification
928/// of the supported syntax.
929///
930/// # Example
931/// ```example
932/// // Works with string methods.
933/// #"a,b;c".split(regex("[,;]"))
934///
935/// // Works with show rules.
936/// #show regex("\d+"): set text(red)
937///
938/// The numbers 1 to 10.
939/// ```
940#[ty(scope)]
941#[derive(Debug, Clone)]
942pub struct Regex(regex::Regex);
943
944impl Regex {
945    /// Create a new regular expression.
946    pub fn new(re: &str) -> StrResult<Self> {
947        regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
948    }
949}
950
951#[scope]
952impl Regex {
953    /// Create a regular expression from a string.
954    #[func(constructor)]
955    pub fn construct(
956        /// The regular expression as a string.
957        ///
958        /// Most regex escape sequences just work because they are not valid Typst
959        /// escape sequences. To produce regex escape sequences that are also valid in
960        /// Typst (e.g. `[\\]`), you need to escape twice. Thus, to match a verbatim
961        /// backslash, you would need to write `{regex("\\\\")}`.
962        ///
963        /// If you need many escape sequences, you can also create a raw element
964        /// and extract its text to use it for your regular expressions:
965        /// ```{regex(`\d+\.\d+\.\d+`.text)}```.
966        regex: Spanned<Str>,
967    ) -> SourceResult<Regex> {
968        Self::new(&regex.v).at(regex.span)
969    }
970}
971
972impl Deref for Regex {
973    type Target = regex::Regex;
974
975    fn deref(&self) -> &Self::Target {
976        &self.0
977    }
978}
979
980impl Repr for Regex {
981    fn repr(&self) -> EcoString {
982        eco_format!("regex({})", self.0.as_str().repr())
983    }
984}
985
986impl PartialEq for Regex {
987    fn eq(&self, other: &Self) -> bool {
988        self.0.as_str() == other.0.as_str()
989    }
990}
991
992impl Hash for Regex {
993    fn hash<H: Hasher>(&self, state: &mut H) {
994        self.0.as_str().hash(state);
995    }
996}
997
998/// A pattern which can be searched for in a string.
999#[derive(Debug, Clone)]
1000pub enum StrPattern {
1001    /// Just a string.
1002    Str(Str),
1003    /// A regular expression.
1004    Regex(Regex),
1005}
1006
1007cast! {
1008    StrPattern,
1009    self => match self {
1010        Self::Str(v) => v.into_value(),
1011        Self::Regex(v) => v.into_value(),
1012    },
1013    v: Str => Self::Str(v),
1014    v: Regex => Self::Regex(v),
1015}
1016
1017/// A side of a string.
1018#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
1019pub enum StrSide {
1020    /// The logical start of the string, may be left or right depending on the
1021    /// language.
1022    Start,
1023    /// The logical end of the string.
1024    End,
1025}
1026
1027cast! {
1028    StrSide,
1029    v: Alignment => match v {
1030        Alignment::START => Self::Start,
1031        Alignment::END => Self::End,
1032        _ => bail!("expected either `start` or `end`"),
1033    },
1034}
1035
1036/// A replacement for a matched [`Str`]
1037pub enum Replacement {
1038    /// A string a match is replaced with.
1039    Str(Str),
1040    /// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`)
1041    /// whose output is inserted for the match.
1042    Func(Func),
1043}
1044
1045cast! {
1046    Replacement,
1047    self => match self {
1048        Self::Str(v) => v.into_value(),
1049        Self::Func(v) => v.into_value(),
1050    },
1051    v: Str => Self::Str(v),
1052    v: Func => Self::Func(v)
1053}