typst_library/foundations/str.rs
1use std::borrow::{Borrow, Cow};
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref, Range};
5
6use comemo::Tracked;
7use ecow::EcoString;
8use serde::{Deserialize, Serialize};
9use typst_syntax::Spanned;
10use unicode_normalization::UnicodeNormalization;
11use unicode_segmentation::UnicodeSegmentation;
12
13use crate::diag::{At, SourceResult, StrResult, bail};
14use crate::engine::Engine;
15use crate::foundations::{
16 Array, Bytes, Cast, Context, Decimal, Dict, Func, IntoValue, Label, Repr, Type,
17 Value, Version, cast, dict, func, repr, scope, ty,
18};
19use crate::layout::Alignment;
20
21/// Create a new [`Str`] from a format string.
22#[macro_export]
23#[doc(hidden)]
24macro_rules! __format_str {
25 ($($tts:tt)*) => {{
26 $crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
27 }};
28}
29
30#[doc(hidden)]
31pub use ecow::eco_format;
32
33#[doc(inline)]
34pub use crate::__format_str as format_str;
35
36/// A sequence of Unicode codepoints.
37///
38/// You can iterate over the grapheme clusters of the string using a
39/// @reference:scripting:loops[for loop]. Grapheme clusters are basically
40/// characters but keep together things that belong together, e.g. multiple
41/// codepoints that together form a flag emoji. Strings can be added with the
42/// `+` operator, @reference:scripting:blocks[joined together] and multiplied
43/// with integers.
44///
45/// Typst provides utility methods for string manipulation. Many of these
46/// methods (e.g., @str.split[`split`], @str.trim[`trim`] and
47/// @str.replace[`replace`]) operate on _patterns:_ A pattern can be either a
48/// string or a @regex[regular expression]. This makes the methods quite
49/// versatile.
50///
51/// All lengths and indices are expressed in terms of UTF-8 bytes. Indices are
52/// zero-based and negative indices wrap around to the end of the string.
53///
54/// You can convert a value to a string with the `str` constructor.
55///
56/// = Example <example>
57/// ```example
58/// #"hello world!" \
59/// #"\"hello\n world\"!" \
60/// #"1 2 3".split() \
61/// #"1,2;3".split(regex("[,;]")) \
62/// #(regex("\\d+") in "ten euros") \
63/// #(regex("\\d+") in "10 euros")
64/// ```
65///
66/// = #short-or-long[Escapes][Escape sequences] <escapes>
67/// Just like in markup, you can escape a few symbols in strings:
68/// - `[\\]` for a backslash
69/// - `[\"]` for a quote
70/// - `[\n]` for a newline
71/// - `[\r]` for a carriage return
72/// - `[\t]` for a tab
73/// - `[\u{1f600}]` for a hexadecimal Unicode escape sequence
74#[ty(scope, cast, title = "String")]
75#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
76#[derive(Serialize, Deserialize)]
77#[serde(transparent)]
78pub struct Str(EcoString);
79
80impl Str {
81 /// Create a new, empty string.
82 pub fn new() -> Self {
83 Self(EcoString::new())
84 }
85
86 /// Return `true` if the length is 0.
87 pub fn is_empty(&self) -> bool {
88 self.0.is_empty()
89 }
90
91 /// Repeat the string a number of times.
92 pub fn repeat(&self, n: usize) -> StrResult<Self> {
93 if self.0.len().checked_mul(n).is_none() {
94 return Err(eco_format!("cannot repeat this string {n} times"));
95 }
96 Ok(Self(self.0.repeat(n)))
97 }
98
99 /// A string slice containing the entire string.
100 pub fn as_str(&self) -> &str {
101 self
102 }
103
104 /// Resolve an index or throw an out of bounds error.
105 fn locate(&self, index: i64) -> StrResult<usize> {
106 self.locate_opt(index)?
107 .ok_or_else(|| out_of_bounds(index, self.len()))
108 }
109
110 /// Resolve an index, if it is within bounds and on a valid char boundary.
111 ///
112 /// `index == len` is considered in bounds.
113 fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
114 let wrapped =
115 if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
116
117 let resolved = wrapped
118 .and_then(|v| usize::try_from(v).ok())
119 .filter(|&v| v <= self.0.len());
120
121 if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
122 return Err(not_a_char_boundary(index));
123 }
124
125 Ok(resolved)
126 }
127}
128
129#[scope]
130impl Str {
131 /// Converts a value to a string.
132 ///
133 /// - Integers are formatted in base 10. This can be overridden with the
134 /// optional `base` parameter.
135 /// - Floats are formatted in base 10 and never in exponential notation.
136 /// - Negative integers and floats are formatted with the Unicode minus sign
137 /// ("−" U+2212) instead of the ASCII minus sign ("-" U+002D).
138 /// - From labels the name is extracted.
139 /// - Bytes are decoded as UTF-8.
140 ///
141 /// If you wish to convert from and to Unicode code points, see the
142 /// @str.to-unicode[`to-unicode`] and @str.from-unicode[`from-unicode`]
143 /// functions.
144 ///
145 /// ```example
146 /// #str(10) \
147 /// #str(4000, base: 16) \
148 /// #str(2.7) \
149 /// #str(1e8) \
150 /// #str(<intro>)
151 /// ```
152 #[func(constructor)]
153 pub fn construct(
154 /// The value that should be converted to a string.
155 value: ToStr,
156 /// The base (radix) to display integers in, between 2 and 36.
157 #[named]
158 #[default(Spanned::detached(Base::Default))]
159 base: Spanned<Base>,
160 ) -> SourceResult<Str> {
161 Ok(match value {
162 ToStr::Str(s) => {
163 if matches!(base.v, Base::User(_)) {
164 bail!(base.span, "base is only supported for integers");
165 }
166 s
167 }
168 ToStr::Int(n) => {
169 let b = base.v.value();
170 if b == 1 && n > 0 {
171 bail!(
172 base.span, "base must be between 2 and 36";
173 hint: "generate a unary representation with `\"1\" * {n}`";
174 );
175 }
176 if b < 2 || b > 36 {
177 bail!(base.span, "base must be between 2 and 36");
178 }
179 repr::format_int_with_base(n, b).into()
180 }
181 })
182 }
183
184 /// The length of the string in UTF-8 encoded bytes.
185 #[func(title = "Length")]
186 pub fn len(&self) -> usize {
187 self.0.len()
188 }
189
190 /// Extracts the first grapheme cluster of the string.
191 ///
192 /// Returns the provided default value if the string is empty or fails with
193 /// an error if no default value was specified.
194 #[func]
195 pub fn first(
196 &self,
197 /// A default value to return if the string is empty.
198 #[named]
199 default: Option<Str>,
200 ) -> StrResult<Str> {
201 self.0
202 .graphemes(true)
203 .next()
204 .map(Into::into)
205 .or(default)
206 .ok_or_else(string_is_empty)
207 }
208
209 /// Extracts the last grapheme cluster of the string.
210 ///
211 /// Returns the provided default value if the string is empty or fails with
212 /// an error if no default value was specified.
213 #[func]
214 pub fn last(
215 &self,
216 /// A default value to return if the string is empty.
217 #[named]
218 default: Option<Str>,
219 ) -> StrResult<Str> {
220 self.0
221 .graphemes(true)
222 .next_back()
223 .map(Into::into)
224 .or(default)
225 .ok_or_else(string_is_empty)
226 }
227
228 /// Extracts the first grapheme cluster after the specified index. Returns
229 /// the default value if the index is out of bounds or fails with an error
230 /// if no default value was specified.
231 #[func]
232 pub fn at(
233 &self,
234 /// The byte index. If negative, indexes from the back.
235 index: i64,
236 /// A default value to return if the index is out of bounds.
237 #[named]
238 default: Option<Value>,
239 ) -> StrResult<Value> {
240 let len = self.len();
241 self.locate_opt(index)?
242 .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
243 .or(default)
244 .ok_or_else(|| no_default_and_out_of_bounds(index, len))
245 }
246
247 /// Extracts a substring of the string. Fails with an error if the start or
248 /// end index is out of bounds.
249 #[func]
250 pub fn slice(
251 &self,
252 /// The start byte index (inclusive). If negative, indexes from the
253 /// back.
254 start: i64,
255 /// The end byte index (exclusive). If omitted, the whole slice until
256 /// the end of the string is extracted. If negative, indexes from the
257 /// back.
258 #[default]
259 end: Option<i64>,
260 /// The number of bytes to extract. This is equivalent to passing
261 /// `start + count` as the `end` position. Mutually exclusive with
262 /// `end`.
263 #[named]
264 count: Option<i64>,
265 ) -> StrResult<Str> {
266 if end.is_some() && count.is_some() {
267 bail!("`end` and `count` are mutually exclusive");
268 }
269 let start = self.locate(start)?;
270 let end = end.or(count.map(|c| start as i64 + c));
271 let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
272 Ok(self.0[start..end].into())
273 }
274
275 /// Returns the grapheme clusters of the string as an array of substrings.
276 #[func]
277 pub fn clusters(&self) -> Array {
278 self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
279 }
280
281 /// Returns the Unicode codepoints of the string as an array of substrings.
282 #[func]
283 pub fn codepoints(&self) -> Array {
284 self.chars().map(|c| Value::Str(c.into())).collect()
285 }
286
287 /// Converts a character into its corresponding code point.
288 ///
289 /// ```example
290 /// #"a".to-unicode() \
291 /// #("a\u{0300}"
292 /// .codepoints()
293 /// .map(str.to-unicode))
294 /// ```
295 #[func]
296 pub fn to_unicode(
297 /// The character that should be converted.
298 character: char,
299 ) -> u32 {
300 character as u32
301 }
302
303 /// Converts a unicode code point into its corresponding string.
304 ///
305 /// ```example
306 /// #str.from-unicode(97)
307 /// ```
308 #[func]
309 pub fn from_unicode(
310 /// The code point that should be converted.
311 value: u32,
312 ) -> StrResult<Str> {
313 let c: char = value
314 .try_into()
315 .map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
316 Ok(c.into())
317 }
318
319 /// Normalizes the string to the given Unicode normal form.
320 ///
321 /// This is useful when manipulating strings containing Unicode combining
322 /// characters.
323 ///
324 /// ```typ
325 /// #assert.eq("é".normalize(form: "nfd"), "e\u{0301}")
326 /// #assert.eq("ſ́".normalize(form: "nfkc"), "ś")
327 /// ```
328 #[func]
329 pub fn normalize(
330 &self,
331 #[named]
332 #[default(UnicodeNormalForm::Nfc)]
333 form: UnicodeNormalForm,
334 ) -> Str {
335 match form {
336 UnicodeNormalForm::Nfc => self.nfc().collect(),
337 UnicodeNormalForm::Nfd => self.nfd().collect(),
338 UnicodeNormalForm::Nfkc => self.nfkc().collect(),
339 UnicodeNormalForm::Nfkd => self.nfkd().collect(),
340 }
341 }
342
343 /// Whether the string contains the specified pattern.
344 ///
345 /// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
346 /// instead of `{"abcd".contains("bc")}`.
347 #[func]
348 pub fn contains(
349 &self,
350 /// The pattern to search for.
351 pattern: StrPattern,
352 ) -> bool {
353 match pattern {
354 StrPattern::Str(pat) => self.0.contains(pat.as_str()),
355 StrPattern::Regex(re) => re.is_match(self),
356 }
357 }
358
359 /// Whether the string starts with the specified pattern.
360 #[func]
361 pub fn starts_with(
362 &self,
363 /// The pattern the string might start with.
364 pattern: StrPattern,
365 ) -> bool {
366 match pattern {
367 StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
368 StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
369 }
370 }
371
372 /// Whether the string ends with the specified pattern.
373 #[func]
374 pub fn ends_with(
375 &self,
376 /// The pattern the string might end with.
377 pattern: StrPattern,
378 ) -> bool {
379 match pattern {
380 StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
381 StrPattern::Regex(re) => {
382 let mut start_byte = 0;
383 while let Some(mat) = re.find_at(self, start_byte) {
384 if mat.end() == self.0.len() {
385 return true;
386 }
387
388 // There might still be a match overlapping this one, so
389 // restart at the next code point.
390 let Some(c) = self[mat.start()..].chars().next() else { break };
391 start_byte = mat.start() + c.len_utf8();
392 }
393 false
394 }
395 }
396 }
397
398 /// Searches for the specified pattern in the string and returns the first
399 /// match as a string or `{none}` if there is no match.
400 #[func]
401 pub fn find(
402 &self,
403 /// The pattern to search for.
404 pattern: StrPattern,
405 ) -> Option<Str> {
406 match pattern {
407 StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
408 StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
409 }
410 }
411
412 /// Searches for the specified pattern in the string and returns the index
413 /// of the first match as an integer or `{none}` if there is no match.
414 #[func]
415 pub fn position(
416 &self,
417 /// The pattern to search for.
418 pattern: StrPattern,
419 ) -> Option<usize> {
420 match pattern {
421 StrPattern::Str(pat) => self.0.find(pat.as_str()),
422 StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
423 }
424 }
425
426 /// Searches for the specified pattern in the string and returns a
427 /// dictionary with details about the first match or `{none}` if there is no
428 /// match.
429 ///
430 /// The returned dictionary has the following keys:
431 /// - `start`: The start offset of the match
432 /// - `end`: The end offset of the match
433 /// - `text`: The text that matched.
434 /// - `captures`: An array containing a string for each matched capturing
435 /// group. The first item of the array contains the first matched
436 /// capturing, not the whole match! This is empty unless the `pattern` was
437 /// a regex with capturing groups.
438 ///
439 /// #example(
440 /// title: "Shape of the returned dictionary",
441 /// ```
442 /// #let pat = regex("not (a|an) (apple|cat)")
443 /// #"I'm a doctor, not an apple.".match(pat) \
444 /// #"I am not a cat!".match(pat)
445 /// ```
446 /// )
447 ///
448 /// #example(
449 /// title: "Different kinds of patterns",
450 /// ```
451 /// #assert.eq("Is there a".match("for this?"), none)
452 /// #"The time of my life.".match(regex("[mit]+e"))
453 /// ```
454 /// )
455 #[func]
456 pub fn match_(
457 &self,
458 /// The pattern to search for.
459 pattern: StrPattern,
460 ) -> Option<Dict> {
461 match pattern {
462 StrPattern::Str(pat) => {
463 self.0.match_indices(pat.as_str()).next().map(match_to_dict)
464 }
465 StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
466 }
467 }
468
469 /// Searches for the specified pattern in the string and returns an array of
470 /// dictionaries with details about all matches. For details about the
471 /// returned dictionaries, see @str.match[above].
472 ///
473 /// ```example
474 /// #"Day by Day.".matches("Day")
475 /// ```
476 #[func]
477 pub fn matches(
478 &self,
479 /// The pattern to search for.
480 pattern: StrPattern,
481 ) -> Array {
482 match pattern {
483 StrPattern::Str(pat) => self
484 .0
485 .match_indices(pat.as_str())
486 .map(match_to_dict)
487 .map(Value::Dict)
488 .collect(),
489 StrPattern::Regex(re) => re
490 .captures_iter(self)
491 .map(captures_to_dict)
492 .map(Value::Dict)
493 .collect(),
494 }
495 }
496
497 /// Replace at most `count` occurrences of the given pattern with a
498 /// replacement string or function (beginning from the start). If no count
499 /// is given, all occurrences are replaced.
500 #[func]
501 pub fn replace(
502 &self,
503 engine: &mut Engine,
504 context: Tracked<Context>,
505 /// The pattern to search for.
506 pattern: StrPattern,
507 /// The string to replace the matches with or a function that gets a
508 /// dictionary for each match and can return individual replacement
509 /// strings.
510 ///
511 /// The dictionary passed to the function has the same shape as the
512 /// dictionary returned by @str.match[`match`].
513 replacement: Replacement,
514 /// If given, only the first `count` matches of the pattern are
515 /// replaced.
516 #[named]
517 count: Option<usize>,
518 ) -> SourceResult<Str> {
519 // Heuristic: Assume the new string is about the same length as
520 // the current string.
521 let mut output = EcoString::with_capacity(self.as_str().len());
522
523 // Replace one match of a pattern with the replacement.
524 let mut last_match = 0;
525 let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
526 // Push everything until the match.
527 output.push_str(&self[last_match..range.start]);
528 last_match = range.end;
529
530 // Determine and push the replacement.
531 match &replacement {
532 Replacement::Str(s) => output.push_str(s),
533 Replacement::Func(func) => {
534 let piece = func
535 .call(engine, context, [dict])?
536 .cast::<Str>()
537 .at(func.span())?;
538 output.push_str(&piece);
539 }
540 }
541
542 Ok(())
543 };
544
545 // Iterate over the matches of the `pattern`.
546 let count = count.unwrap_or(usize::MAX);
547 match &pattern {
548 StrPattern::Str(pat) => {
549 for m in self.match_indices(pat.as_str()).take(count) {
550 let (start, text) = m;
551 handle_match(start..start + text.len(), match_to_dict(m))?;
552 }
553 }
554 StrPattern::Regex(re) => {
555 for caps in re.captures_iter(self).take(count) {
556 // Extract the entire match over all capture groups.
557 let m = caps.get(0).unwrap();
558 handle_match(m.start()..m.end(), captures_to_dict(caps))?;
559 }
560 }
561 }
562
563 // Push the remainder.
564 output.push_str(&self[last_match..]);
565 Ok(output.into())
566 }
567
568 /// Removes matches of a pattern from one or both sides of the string, once
569 /// or repeatedly and returns the resulting string.
570 #[func]
571 pub fn trim(
572 &self,
573 /// The pattern to search for. If `{none}`, trims white spaces.
574 #[default]
575 pattern: Option<StrPattern>,
576 /// Can be `{start}` or `{end}` to only trim the start or end of the
577 /// string. If omitted, both sides are trimmed.
578 #[named]
579 at: Option<StrSide>,
580 /// Whether to repeatedly removes matches of the pattern or just once.
581 /// Defaults to `{true}`.
582 #[named]
583 #[default(true)]
584 repeat: bool,
585 ) -> Str {
586 let mut start = matches!(at, Some(StrSide::Start) | None);
587 let end = matches!(at, Some(StrSide::End) | None);
588
589 let trimmed = match pattern {
590 None => match at {
591 None => self.0.trim(),
592 Some(StrSide::Start) => self.0.trim_start(),
593 Some(StrSide::End) => self.0.trim_end(),
594 },
595 Some(StrPattern::Str(pat)) => {
596 let pat = pat.as_str();
597 let mut s = self.as_str();
598 if repeat {
599 if start {
600 s = s.trim_start_matches(pat);
601 }
602 if end {
603 s = s.trim_end_matches(pat);
604 }
605 } else {
606 if start {
607 s = s.strip_prefix(pat).unwrap_or(s);
608 }
609 if end {
610 s = s.strip_suffix(pat).unwrap_or(s);
611 }
612 }
613 s
614 }
615 Some(StrPattern::Regex(re)) => {
616 let s = self.as_str();
617 let mut last = None;
618 let mut range = 0..s.len();
619
620 for m in re.find_iter(s) {
621 // Does this match follow directly after the last one?
622 let consecutive = last == Some(m.start());
623
624 // As long as we're at the beginning or in a consecutive run
625 // of matches, and we're still trimming at the start, trim.
626 start &= m.start() == 0 || consecutive;
627 if start {
628 range.start = m.end();
629 start &= repeat;
630 }
631
632 // Reset end trim if we aren't consecutive anymore or aren't
633 // repeating.
634 if end && (!consecutive || !repeat) {
635 range.end = m.start();
636 }
637
638 last = Some(m.end());
639 }
640
641 // Is the last match directly at the end?
642 if last.is_some_and(|last| last < s.len()) {
643 range.end = s.len();
644 }
645
646 &s[range.start..range.start.max(range.end)]
647 }
648 };
649
650 trimmed.into()
651 }
652
653 /// Splits a string at matches of a specified pattern and returns an array
654 /// of the resulting parts.
655 ///
656 /// When the empty string is used as a separator, it separates every
657 /// character (i.e., Unicode code point) in the string, along with the
658 /// beginning and end of the string. In practice, this means that the
659 /// resulting list of parts will contain the empty string at the start and
660 /// end of the list.
661 #[func]
662 pub fn split(
663 &self,
664 /// The pattern to split at. Defaults to whitespace.
665 #[default]
666 pattern: Option<StrPattern>,
667 ) -> Array {
668 let s = self.as_str();
669 match pattern {
670 None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
671 Some(StrPattern::Str(pat)) => {
672 s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
673 }
674 Some(StrPattern::Regex(re)) => {
675 re.split(s).map(|v| Value::Str(v.into())).collect()
676 }
677 }
678 }
679
680 /// Reverses the string.
681 ///
682 /// More specifically, this returns a string with the same grapheme
683 /// clusters, in reversed order.
684 ///
685 /// ```example
686 /// #"Pirate flag: 🏴☠️".rev()
687 /// ```
688 #[func(title = "Reverse")]
689 pub fn rev(&self) -> Str {
690 let mut s = EcoString::with_capacity(self.0.len());
691 for grapheme in self.as_str().graphemes(true).rev() {
692 s.push_str(grapheme);
693 }
694 s.into()
695 }
696}
697
698impl Deref for Str {
699 type Target = str;
700
701 fn deref(&self) -> &str {
702 &self.0
703 }
704}
705
706impl Debug for Str {
707 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
708 Debug::fmt(self.as_str(), f)
709 }
710}
711
712impl Display for Str {
713 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
714 Display::fmt(self.as_str(), f)
715 }
716}
717
718impl Repr for Str {
719 fn repr(&self) -> EcoString {
720 self.as_str().repr()
721 }
722}
723
724impl Repr for EcoString {
725 fn repr(&self) -> EcoString {
726 self.as_str().repr()
727 }
728}
729
730impl Repr for str {
731 fn repr(&self) -> EcoString {
732 let mut r = EcoString::with_capacity(self.len() + 2);
733 r.push('"');
734 for c in self.chars() {
735 match c {
736 '\0' => r.push_str(r"\u{0}"),
737 '\'' => r.push('\''),
738 '"' => r.push_str(r#"\""#),
739 _ => r.extend(c.escape_debug()),
740 }
741 }
742 r.push('"');
743 r
744 }
745}
746
747impl Repr for char {
748 fn repr(&self) -> EcoString {
749 EcoString::from(*self).repr()
750 }
751}
752
753impl Add for Str {
754 type Output = Self;
755
756 fn add(mut self, rhs: Self) -> Self::Output {
757 self += rhs;
758 self
759 }
760}
761
762impl AddAssign for Str {
763 fn add_assign(&mut self, rhs: Self) {
764 self.0.push_str(rhs.as_str());
765 }
766}
767
768impl AsRef<str> for Str {
769 fn as_ref(&self) -> &str {
770 self
771 }
772}
773
774impl Borrow<str> for Str {
775 fn borrow(&self) -> &str {
776 self
777 }
778}
779
780impl From<char> for Str {
781 fn from(c: char) -> Self {
782 Self(c.into())
783 }
784}
785
786impl From<&str> for Str {
787 fn from(s: &str) -> Self {
788 Self(s.into())
789 }
790}
791
792impl From<EcoString> for Str {
793 fn from(s: EcoString) -> Self {
794 Self(s)
795 }
796}
797
798impl From<String> for Str {
799 fn from(s: String) -> Self {
800 Self(s.into())
801 }
802}
803
804impl From<Cow<'_, str>> for Str {
805 fn from(s: Cow<str>) -> Self {
806 Self(s.into())
807 }
808}
809
810impl FromIterator<char> for Str {
811 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
812 Self(iter.into_iter().collect())
813 }
814}
815
816impl From<Str> for EcoString {
817 fn from(str: Str) -> Self {
818 str.0
819 }
820}
821
822impl From<Str> for String {
823 fn from(s: Str) -> Self {
824 s.0.into()
825 }
826}
827
828cast! {
829 char,
830 self => Value::Str(self.into()),
831 string: Str => {
832 let mut chars = string.chars();
833 match (chars.next(), chars.next()) {
834 (Some(c), None) => c,
835 _ => bail!("expected exactly one character"),
836 }
837 },
838}
839
840cast! {
841 &str,
842 self => Value::Str(self.into()),
843}
844
845cast! {
846 EcoString,
847 self => Value::Str(self.into()),
848 v: Str => v.into(),
849}
850
851cast! {
852 String,
853 self => Value::Str(self.into()),
854 v: Str => v.into(),
855}
856
857/// A value that can be cast to a string.
858pub enum ToStr {
859 /// A string value ready to be used as-is.
860 Str(Str),
861 /// An integer about to be formatted in a given base.
862 Int(i64),
863}
864
865cast! {
866 ToStr,
867 v: i64 => Self::Int(v),
868 v: f64 => Self::Str(repr::display_float(v).into()),
869 v: Decimal => Self::Str(format_str!("{}", v)),
870 v: Version => Self::Str(format_str!("{}", v)),
871 v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid UTF-8")?),
872 v: Label => Self::Str(v.resolve().as_str().into()),
873 v: Type => Self::Str(v.long_name().into()),
874 v: Str => Self::Str(v),
875}
876
877/// Similar to `Option<i64>`, but the default value casts to `10` rather than
878/// `none`, so that the right default value is documented.
879#[derive(Debug, Copy, Clone)]
880pub enum Base {
881 Default,
882 User(i64),
883}
884
885impl Base {
886 pub fn value(self) -> i64 {
887 match self {
888 Self::Default => 10,
889 Self::User(b) => b,
890 }
891 }
892}
893
894cast! {
895 Base,
896 self => self.value().into_value(),
897 v: i64 => Self::User(v),
898}
899
900/// A Unicode normalization form.
901#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
902pub enum UnicodeNormalForm {
903 /// Canonical composition where e.g. accented letters are turned into a
904 /// single Unicode codepoint.
905 #[string("nfc")]
906 Nfc,
907 /// Canonical decomposition where e.g. accented letters are split into a
908 /// separate base and diacritic.
909 #[string("nfd")]
910 Nfd,
911 /// Like NFC, but using the Unicode compatibility decompositions.
912 #[string("nfkc")]
913 Nfkc,
914 /// Like NFD, but using the Unicode compatibility decompositions.
915 #[string("nfkd")]
916 Nfkd,
917}
918
919/// Convert an item of std's `match_indices` to a dictionary.
920fn match_to_dict((start, text): (usize, &str)) -> Dict {
921 dict! {
922 "start" => start,
923 "end" => start + text.len(),
924 "text" => text,
925 "captures" => Array::new(),
926 }
927}
928
929/// Convert regex captures to a dictionary.
930fn captures_to_dict(cap: regex::Captures) -> Dict {
931 let m = cap.get(0).expect("missing first match");
932 dict! {
933 "start" => m.start(),
934 "end" => m.end(),
935 "text" => m.as_str(),
936 "captures" => cap.iter()
937 .skip(1)
938 .map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
939 .collect::<Array>(),
940 }
941}
942
943/// The out of bounds access error message.
944#[cold]
945fn out_of_bounds(index: i64, len: usize) -> EcoString {
946 eco_format!("string index out of bounds (index: {index}, len: {len})")
947}
948
949/// The out of bounds access error message when no default value was given.
950#[cold]
951fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
952 eco_format!(
953 "no default value was specified and string index out of bounds \
954 (index: {index}, len: {len})"
955 )
956}
957
958/// The char boundary access error message.
959#[cold]
960fn not_a_char_boundary(index: i64) -> EcoString {
961 eco_format!("string index {index} is not a character boundary")
962}
963
964/// The error message when the string is empty.
965#[cold]
966fn string_is_empty() -> EcoString {
967 "string is empty".into()
968}
969
970/// A regular expression.
971///
972/// Can be used as a @reference:styling:show-rules[show rule selector] and with
973/// @str[string methods] like `find`, `split`, `replace`, and `match`.
974///
975/// #link("https://docs.rs/regex/latest/regex/#syntax")[See here] for a
976/// specification of the supported syntax.
977///
978/// = Example <example>
979/// ```example
980/// // Works with string methods.
981/// #"a,b;c".split(regex("[,;]"))
982///
983/// // Works with show rules.
984/// #show regex("\\d+"): set text(red)
985///
986/// The numbers 1 to 10.
987/// ```
988#[ty(scope)]
989#[derive(Debug, Clone)]
990pub struct Regex(regex::Regex);
991
992impl Regex {
993 /// Create a new regular expression.
994 pub fn new(re: &str) -> StrResult<Self> {
995 regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
996 }
997}
998
999#[scope]
1000impl Regex {
1001 /// Create a regular expression from a string.
1002 #[func(constructor)]
1003 pub fn construct(
1004 /// The regular expression as a string.
1005 ///
1006 /// Both Typst strings and regular expressions use backslashes for
1007 /// escaping. To produce a regex escape sequence that is also valid in
1008 /// Typst, you need to escape the backslash itself (e.g., writing
1009 /// `{regex("\\\\")}` for the regex `\\`). Regex escape sequences that
1010 /// are not valid Typst escape sequences (e.g., `\d` and `\b`) can be
1011 /// entered into strings directly, but it's good practice to still
1012 /// escape them to avoid ambiguity (i.e., `{regex("\\b\\d")}`). See the
1013 /// @str:escapes[list of valid string escape sequences].
1014 ///
1015 /// If you need many escape sequences, you can also create a raw element
1016 /// and extract its text to use it for your regular expressions:
1017 /// ``` {regex(`\d+\.\d+\.\d+`.text)}```.
1018 regex: Spanned<Str>,
1019 ) -> SourceResult<Regex> {
1020 Self::new(®ex.v).at(regex.span)
1021 }
1022}
1023
1024impl Deref for Regex {
1025 type Target = regex::Regex;
1026
1027 fn deref(&self) -> &Self::Target {
1028 &self.0
1029 }
1030}
1031
1032impl Repr for Regex {
1033 fn repr(&self) -> EcoString {
1034 eco_format!("regex({})", self.0.as_str().repr())
1035 }
1036}
1037
1038impl PartialEq for Regex {
1039 fn eq(&self, other: &Self) -> bool {
1040 self.0.as_str() == other.0.as_str()
1041 }
1042}
1043
1044impl Hash for Regex {
1045 fn hash<H: Hasher>(&self, state: &mut H) {
1046 self.0.as_str().hash(state);
1047 }
1048}
1049
1050/// A pattern which can be searched for in a string.
1051#[derive(Debug, Clone)]
1052pub enum StrPattern {
1053 /// Just a string.
1054 Str(Str),
1055 /// A regular expression.
1056 Regex(Regex),
1057}
1058
1059cast! {
1060 StrPattern,
1061 self => match self {
1062 Self::Str(v) => v.into_value(),
1063 Self::Regex(v) => v.into_value(),
1064 },
1065 v: Str => Self::Str(v),
1066 v: Regex => Self::Regex(v),
1067}
1068
1069/// A side of a string.
1070#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
1071pub enum StrSide {
1072 /// The logical start of the string, may be left or right depending on the
1073 /// language.
1074 Start,
1075 /// The logical end of the string.
1076 End,
1077}
1078
1079cast! {
1080 StrSide,
1081 v: Alignment => match v {
1082 Alignment::START => Self::Start,
1083 Alignment::END => Self::End,
1084 _ => bail!("expected either `start` or `end`"),
1085 },
1086}
1087
1088/// A replacement for a matched [`Str`]
1089pub enum Replacement {
1090 /// A string a match is replaced with.
1091 Str(Str),
1092 /// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`)
1093 /// whose output is inserted for the match.
1094 Func(Func),
1095}
1096
1097cast! {
1098 Replacement,
1099 self => match self {
1100 Self::Str(v) => v.into_value(),
1101 Self::Func(v) => v.into_value(),
1102 },
1103 v: Str => Self::Str(v),
1104 v: Func => Self::Func(v)
1105}