typst_library/foundations/str.rs
1use std::borrow::{Borrow, Cow};
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref, Range};
5
6use comemo::Tracked;
7use ecow::EcoString;
8use serde::{Deserialize, Serialize};
9use typst_syntax::{Span, Spanned};
10use unicode_normalization::UnicodeNormalization;
11use unicode_segmentation::UnicodeSegmentation;
12
13use crate::diag::{At, SourceResult, StrResult, bail};
14use crate::engine::Engine;
15use crate::foundations::{
16 Array, Bytes, Cast, Context, Decimal, Dict, Func, IntoValue, Label, Repr, Type,
17 Value, Version, cast, dict, func, repr, scope, ty,
18};
19use crate::layout::Alignment;
20
21/// Create a new [`Str`] from a format string.
22#[macro_export]
23#[doc(hidden)]
24macro_rules! __format_str {
25 ($($tts:tt)*) => {{
26 $crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
27 }};
28}
29
30#[doc(hidden)]
31pub use ecow::eco_format;
32
33#[doc(inline)]
34pub use crate::__format_str as format_str;
35
36/// A sequence of Unicode codepoints.
37///
38/// You can iterate over the grapheme clusters of the string using a [for
39/// loop]($scripting/#loops). Grapheme clusters are basically characters but
40/// keep together things that belong together, e.g. multiple codepoints that
41/// together form a flag emoji. Strings can be added with the `+` operator,
42/// [joined together]($scripting/#blocks) and multiplied with integers.
43///
44/// Typst provides utility methods for string manipulation. Many of these
45/// methods (e.g., [`split`]($str.split), [`trim`]($str.trim) and
46/// [`replace`]($str.replace)) operate on _patterns:_ A pattern can be either a
47/// string or a [regular expression]($regex). This makes the methods quite
48/// versatile.
49///
50/// All lengths and indices are expressed in terms of UTF-8 bytes. Indices are
51/// zero-based and negative indices wrap around to the end of the string.
52///
53/// You can convert a value to a string with this type's constructor.
54///
55/// # Example
56/// ```example
57/// #"hello world!" \
58/// #"\"hello\n world\"!" \
59/// #"1 2 3".split() \
60/// #"1,2;3".split(regex("[,;]")) \
61/// #(regex("\d+") in "ten euros") \
62/// #(regex("\d+") in "10 euros")
63/// ```
64///
65/// # Escape sequences { #escapes }
66/// Just like in markup, you can escape a few symbols in strings:
67/// - `[\\]` for a backslash
68/// - `[\"]` for a quote
69/// - `[\n]` for a newline
70/// - `[\r]` for a carriage return
71/// - `[\t]` for a tab
72/// - `[\u{1f600}]` for a hexadecimal Unicode escape sequence
73#[ty(scope, cast, title = "String")]
74#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
75#[derive(Serialize, Deserialize)]
76#[serde(transparent)]
77pub struct Str(EcoString);
78
79impl Str {
80 /// Create a new, empty string.
81 pub fn new() -> Self {
82 Self(EcoString::new())
83 }
84
85 /// Return `true` if the length is 0.
86 pub fn is_empty(&self) -> bool {
87 self.0.is_empty()
88 }
89
90 /// Repeat the string a number of times.
91 pub fn repeat(&self, n: usize) -> StrResult<Self> {
92 if self.0.len().checked_mul(n).is_none() {
93 return Err(eco_format!("cannot repeat this string {n} times"));
94 }
95 Ok(Self(self.0.repeat(n)))
96 }
97
98 /// A string slice containing the entire string.
99 pub fn as_str(&self) -> &str {
100 self
101 }
102
103 /// Resolve an index or throw an out of bounds error.
104 fn locate(&self, index: i64) -> StrResult<usize> {
105 self.locate_opt(index)?
106 .ok_or_else(|| out_of_bounds(index, self.len()))
107 }
108
109 /// Resolve an index, if it is within bounds and on a valid char boundary.
110 ///
111 /// `index == len` is considered in bounds.
112 fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
113 let wrapped =
114 if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
115
116 let resolved = wrapped
117 .and_then(|v| usize::try_from(v).ok())
118 .filter(|&v| v <= self.0.len());
119
120 if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
121 return Err(not_a_char_boundary(index));
122 }
123
124 Ok(resolved)
125 }
126}
127
128#[scope]
129impl Str {
130 /// Converts a value to a string.
131 ///
132 /// - Integers are formatted in base 10. This can be overridden with the
133 /// optional `base` parameter.
134 /// - Floats are formatted in base 10 and never in exponential notation.
135 /// - Negative integers and floats are formatted with the Unicode minus sign
136 /// ("−" U+2212) instead of the ASCII minus sign ("-" U+002D).
137 /// - From labels the name is extracted.
138 /// - Bytes are decoded as UTF-8.
139 ///
140 /// If you wish to convert from and to Unicode code points, see the
141 /// [`to-unicode`]($str.to-unicode) and [`from-unicode`]($str.from-unicode)
142 /// functions.
143 ///
144 /// ```example
145 /// #str(10) \
146 /// #str(4000, base: 16) \
147 /// #str(2.7) \
148 /// #str(1e8) \
149 /// #str(<intro>)
150 /// ```
151 #[func(constructor)]
152 pub fn construct(
153 /// The value that should be converted to a string.
154 value: ToStr,
155 /// The base (radix) to display integers in, between 2 and 36.
156 #[named]
157 #[default(Spanned::new(10, Span::detached()))]
158 base: Spanned<i64>,
159 ) -> SourceResult<Str> {
160 Ok(match value {
161 ToStr::Str(s) => {
162 if base.v != 10 {
163 bail!(base.span, "base is only supported for integers");
164 }
165 s
166 }
167 ToStr::Int(n) => {
168 if base.v < 2 || base.v > 36 {
169 bail!(base.span, "base must be between 2 and 36");
170 }
171 repr::format_int_with_base(n, base.v).into()
172 }
173 })
174 }
175
176 /// The length of the string in UTF-8 encoded bytes.
177 #[func(title = "Length")]
178 pub fn len(&self) -> usize {
179 self.0.len()
180 }
181
182 /// Extracts the first grapheme cluster of the string.
183 ///
184 /// Returns the provided default value if the string is empty or fails with
185 /// an error if no default value was specified.
186 #[func]
187 pub fn first(
188 &self,
189 /// A default value to return if the string is empty.
190 #[named]
191 default: Option<Str>,
192 ) -> StrResult<Str> {
193 self.0
194 .graphemes(true)
195 .next()
196 .map(Into::into)
197 .or(default)
198 .ok_or_else(string_is_empty)
199 }
200
201 /// Extracts the last grapheme cluster of the string.
202 ///
203 /// Returns the provided default value if the string is empty or fails with
204 /// an error if no default value was specified.
205 #[func]
206 pub fn last(
207 &self,
208 /// A default value to return if the string is empty.
209 #[named]
210 default: Option<Str>,
211 ) -> StrResult<Str> {
212 self.0
213 .graphemes(true)
214 .next_back()
215 .map(Into::into)
216 .or(default)
217 .ok_or_else(string_is_empty)
218 }
219
220 /// Extracts the first grapheme cluster after the specified index. Returns
221 /// the default value if the index is out of bounds or fails with an error
222 /// if no default value was specified.
223 #[func]
224 pub fn at(
225 &self,
226 /// The byte index. If negative, indexes from the back.
227 index: i64,
228 /// A default value to return if the index is out of bounds.
229 #[named]
230 default: Option<Value>,
231 ) -> StrResult<Value> {
232 let len = self.len();
233 self.locate_opt(index)?
234 .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
235 .or(default)
236 .ok_or_else(|| no_default_and_out_of_bounds(index, len))
237 }
238
239 /// Extracts a substring of the string.
240 /// Fails with an error if the start or end index is out of bounds.
241 #[func]
242 pub fn slice(
243 &self,
244 /// The start byte index (inclusive). If negative, indexes from the
245 /// back.
246 start: i64,
247 /// The end byte index (exclusive). If omitted, the whole slice until
248 /// the end of the string is extracted. If negative, indexes from the
249 /// back.
250 #[default]
251 end: Option<i64>,
252 /// The number of bytes to extract. This is equivalent to passing
253 /// `start + count` as the `end` position. Mutually exclusive with `end`.
254 #[named]
255 count: Option<i64>,
256 ) -> StrResult<Str> {
257 let start = self.locate(start)?;
258 let end = end.or(count.map(|c| start as i64 + c));
259 let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
260 Ok(self.0[start..end].into())
261 }
262
263 /// Returns the grapheme clusters of the string as an array of substrings.
264 #[func]
265 pub fn clusters(&self) -> Array {
266 self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
267 }
268
269 /// Returns the Unicode codepoints of the string as an array of substrings.
270 #[func]
271 pub fn codepoints(&self) -> Array {
272 self.chars().map(|c| Value::Str(c.into())).collect()
273 }
274
275 /// Converts a character into its corresponding code point.
276 ///
277 /// ```example
278 /// #"a".to-unicode() \
279 /// #("a\u{0300}"
280 /// .codepoints()
281 /// .map(str.to-unicode))
282 /// ```
283 #[func]
284 pub fn to_unicode(
285 /// The character that should be converted.
286 character: char,
287 ) -> u32 {
288 character as u32
289 }
290
291 /// Converts a unicode code point into its corresponding string.
292 ///
293 /// ```example
294 /// #str.from-unicode(97)
295 /// ```
296 #[func]
297 pub fn from_unicode(
298 /// The code point that should be converted.
299 value: u32,
300 ) -> StrResult<Str> {
301 let c: char = value
302 .try_into()
303 .map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
304 Ok(c.into())
305 }
306
307 /// Normalizes the string to the given Unicode normal form.
308 ///
309 /// This is useful when manipulating strings containing Unicode combining
310 /// characters.
311 ///
312 /// ```typ
313 /// #assert.eq("é".normalize(form: "nfd"), "e\u{0301}")
314 /// #assert.eq("ſ́".normalize(form: "nfkc"), "ś")
315 /// ```
316 #[func]
317 pub fn normalize(
318 &self,
319 #[named]
320 #[default(UnicodeNormalForm::Nfc)]
321 form: UnicodeNormalForm,
322 ) -> Str {
323 match form {
324 UnicodeNormalForm::Nfc => self.nfc().collect(),
325 UnicodeNormalForm::Nfd => self.nfd().collect(),
326 UnicodeNormalForm::Nfkc => self.nfkc().collect(),
327 UnicodeNormalForm::Nfkd => self.nfkd().collect(),
328 }
329 }
330
331 /// Whether the string contains the specified pattern.
332 ///
333 /// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
334 /// instead of `{"abcd".contains("bc")}`.
335 #[func]
336 pub fn contains(
337 &self,
338 /// The pattern to search for.
339 pattern: StrPattern,
340 ) -> bool {
341 match pattern {
342 StrPattern::Str(pat) => self.0.contains(pat.as_str()),
343 StrPattern::Regex(re) => re.is_match(self),
344 }
345 }
346
347 /// Whether the string starts with the specified pattern.
348 #[func]
349 pub fn starts_with(
350 &self,
351 /// The pattern the string might start with.
352 pattern: StrPattern,
353 ) -> bool {
354 match pattern {
355 StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
356 StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
357 }
358 }
359
360 /// Whether the string ends with the specified pattern.
361 #[func]
362 pub fn ends_with(
363 &self,
364 /// The pattern the string might end with.
365 pattern: StrPattern,
366 ) -> bool {
367 match pattern {
368 StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
369 StrPattern::Regex(re) => {
370 let mut start_byte = 0;
371 while let Some(mat) = re.find_at(self, start_byte) {
372 if mat.end() == self.0.len() {
373 return true;
374 }
375
376 // There might still be a match overlapping this one, so
377 // restart at the next code point.
378 let Some(c) = self[mat.start()..].chars().next() else { break };
379 start_byte = mat.start() + c.len_utf8();
380 }
381 false
382 }
383 }
384 }
385
386 /// Searches for the specified pattern in the string and returns the first
387 /// match as a string or `{none}` if there is no match.
388 #[func]
389 pub fn find(
390 &self,
391 /// The pattern to search for.
392 pattern: StrPattern,
393 ) -> Option<Str> {
394 match pattern {
395 StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
396 StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
397 }
398 }
399
400 /// Searches for the specified pattern in the string and returns the index
401 /// of the first match as an integer or `{none}` if there is no match.
402 #[func]
403 pub fn position(
404 &self,
405 /// The pattern to search for.
406 pattern: StrPattern,
407 ) -> Option<usize> {
408 match pattern {
409 StrPattern::Str(pat) => self.0.find(pat.as_str()),
410 StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
411 }
412 }
413
414 /// Searches for the specified pattern in the string and returns a
415 /// dictionary with details about the first match or `{none}` if there is no
416 /// match.
417 ///
418 /// The returned dictionary has the following keys:
419 /// - `start`: The start offset of the match
420 /// - `end`: The end offset of the match
421 /// - `text`: The text that matched.
422 /// - `captures`: An array containing a string for each matched capturing
423 /// group. The first item of the array contains the first matched
424 /// capturing, not the whole match! This is empty unless the `pattern` was
425 /// a regex with capturing groups.
426 ///
427 /// ```example:"Shape of the returned dictionary"
428 /// #let pat = regex("not (a|an) (apple|cat)")
429 /// #"I'm a doctor, not an apple.".match(pat) \
430 /// #"I am not a cat!".match(pat)
431 /// ```
432 ///
433 /// ```example:"Different kinds of patterns"
434 /// #assert.eq("Is there a".match("for this?"), none)
435 /// #"The time of my life.".match(regex("[mit]+e"))
436 /// ```
437 #[func]
438 pub fn match_(
439 &self,
440 /// The pattern to search for.
441 pattern: StrPattern,
442 ) -> Option<Dict> {
443 match pattern {
444 StrPattern::Str(pat) => {
445 self.0.match_indices(pat.as_str()).next().map(match_to_dict)
446 }
447 StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
448 }
449 }
450
451 /// Searches for the specified pattern in the string and returns an array of
452 /// dictionaries with details about all matches. For details about the
453 /// returned dictionaries, see [above]($str.match).
454 ///
455 /// ```example
456 /// #"Day by Day.".matches("Day")
457 /// ```
458 #[func]
459 pub fn matches(
460 &self,
461 /// The pattern to search for.
462 pattern: StrPattern,
463 ) -> Array {
464 match pattern {
465 StrPattern::Str(pat) => self
466 .0
467 .match_indices(pat.as_str())
468 .map(match_to_dict)
469 .map(Value::Dict)
470 .collect(),
471 StrPattern::Regex(re) => re
472 .captures_iter(self)
473 .map(captures_to_dict)
474 .map(Value::Dict)
475 .collect(),
476 }
477 }
478
479 /// Replace at most `count` occurrences of the given pattern with a
480 /// replacement string or function (beginning from the start). If no count
481 /// is given, all occurrences are replaced.
482 #[func]
483 pub fn replace(
484 &self,
485 engine: &mut Engine,
486 context: Tracked<Context>,
487 /// The pattern to search for.
488 pattern: StrPattern,
489 /// The string to replace the matches with or a function that gets a
490 /// dictionary for each match and can return individual replacement
491 /// strings.
492 ///
493 /// The dictionary passed to the function has the same shape as the
494 /// dictionary returned by [`match`]($str.match).
495 replacement: Replacement,
496 /// If given, only the first `count` matches of the pattern are placed.
497 #[named]
498 count: Option<usize>,
499 ) -> SourceResult<Str> {
500 // Heuristic: Assume the new string is about the same length as
501 // the current string.
502 let mut output = EcoString::with_capacity(self.as_str().len());
503
504 // Replace one match of a pattern with the replacement.
505 let mut last_match = 0;
506 let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
507 // Push everything until the match.
508 output.push_str(&self[last_match..range.start]);
509 last_match = range.end;
510
511 // Determine and push the replacement.
512 match &replacement {
513 Replacement::Str(s) => output.push_str(s),
514 Replacement::Func(func) => {
515 let piece = func
516 .call(engine, context, [dict])?
517 .cast::<Str>()
518 .at(func.span())?;
519 output.push_str(&piece);
520 }
521 }
522
523 Ok(())
524 };
525
526 // Iterate over the matches of the `pattern`.
527 let count = count.unwrap_or(usize::MAX);
528 match &pattern {
529 StrPattern::Str(pat) => {
530 for m in self.match_indices(pat.as_str()).take(count) {
531 let (start, text) = m;
532 handle_match(start..start + text.len(), match_to_dict(m))?;
533 }
534 }
535 StrPattern::Regex(re) => {
536 for caps in re.captures_iter(self).take(count) {
537 // Extract the entire match over all capture groups.
538 let m = caps.get(0).unwrap();
539 handle_match(m.start()..m.end(), captures_to_dict(caps))?;
540 }
541 }
542 }
543
544 // Push the remainder.
545 output.push_str(&self[last_match..]);
546 Ok(output.into())
547 }
548
549 /// Removes matches of a pattern from one or both sides of the string, once or
550 /// repeatedly and returns the resulting string.
551 #[func]
552 pub fn trim(
553 &self,
554 /// The pattern to search for. If `{none}`, trims white spaces.
555 #[default]
556 pattern: Option<StrPattern>,
557 /// Can be `{start}` or `{end}` to only trim the start or end of the
558 /// string. If omitted, both sides are trimmed.
559 #[named]
560 at: Option<StrSide>,
561 /// Whether to repeatedly removes matches of the pattern or just once.
562 /// Defaults to `{true}`.
563 #[named]
564 #[default(true)]
565 repeat: bool,
566 ) -> Str {
567 let mut start = matches!(at, Some(StrSide::Start) | None);
568 let end = matches!(at, Some(StrSide::End) | None);
569
570 let trimmed = match pattern {
571 None => match at {
572 None => self.0.trim(),
573 Some(StrSide::Start) => self.0.trim_start(),
574 Some(StrSide::End) => self.0.trim_end(),
575 },
576 Some(StrPattern::Str(pat)) => {
577 let pat = pat.as_str();
578 let mut s = self.as_str();
579 if repeat {
580 if start {
581 s = s.trim_start_matches(pat);
582 }
583 if end {
584 s = s.trim_end_matches(pat);
585 }
586 } else {
587 if start {
588 s = s.strip_prefix(pat).unwrap_or(s);
589 }
590 if end {
591 s = s.strip_suffix(pat).unwrap_or(s);
592 }
593 }
594 s
595 }
596 Some(StrPattern::Regex(re)) => {
597 let s = self.as_str();
598 let mut last = None;
599 let mut range = 0..s.len();
600
601 for m in re.find_iter(s) {
602 // Does this match follow directly after the last one?
603 let consecutive = last == Some(m.start());
604
605 // As long as we're at the beginning or in a consecutive run
606 // of matches, and we're still trimming at the start, trim.
607 start &= m.start() == 0 || consecutive;
608 if start {
609 range.start = m.end();
610 start &= repeat;
611 }
612
613 // Reset end trim if we aren't consecutive anymore or aren't
614 // repeating.
615 if end && (!consecutive || !repeat) {
616 range.end = m.start();
617 }
618
619 last = Some(m.end());
620 }
621
622 // Is the last match directly at the end?
623 if last.is_some_and(|last| last < s.len()) {
624 range.end = s.len();
625 }
626
627 &s[range.start..range.start.max(range.end)]
628 }
629 };
630
631 trimmed.into()
632 }
633
634 /// Splits a string at matches of a specified pattern and returns an array
635 /// of the resulting parts.
636 ///
637 /// When the empty string is used as a separator, it separates every
638 /// character (i.e., Unicode code point) in the string, along with the
639 /// beginning and end of the string. In practice, this means that the
640 /// resulting list of parts will contain the empty string at the start
641 /// and end of the list.
642 #[func]
643 pub fn split(
644 &self,
645 /// The pattern to split at. Defaults to whitespace.
646 #[default]
647 pattern: Option<StrPattern>,
648 ) -> Array {
649 let s = self.as_str();
650 match pattern {
651 None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
652 Some(StrPattern::Str(pat)) => {
653 s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
654 }
655 Some(StrPattern::Regex(re)) => {
656 re.split(s).map(|v| Value::Str(v.into())).collect()
657 }
658 }
659 }
660
661 /// Reverse the string.
662 #[func(title = "Reverse")]
663 pub fn rev(&self) -> Str {
664 let mut s = EcoString::with_capacity(self.0.len());
665 for grapheme in self.as_str().graphemes(true).rev() {
666 s.push_str(grapheme);
667 }
668 s.into()
669 }
670}
671
672impl Deref for Str {
673 type Target = str;
674
675 fn deref(&self) -> &str {
676 &self.0
677 }
678}
679
680impl Debug for Str {
681 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
682 Debug::fmt(self.as_str(), f)
683 }
684}
685
686impl Display for Str {
687 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
688 Display::fmt(self.as_str(), f)
689 }
690}
691
692impl Repr for Str {
693 fn repr(&self) -> EcoString {
694 self.as_ref().repr()
695 }
696}
697
698impl Repr for EcoString {
699 fn repr(&self) -> EcoString {
700 self.as_ref().repr()
701 }
702}
703
704impl Repr for str {
705 fn repr(&self) -> EcoString {
706 let mut r = EcoString::with_capacity(self.len() + 2);
707 r.push('"');
708 for c in self.chars() {
709 match c {
710 '\0' => r.push_str(r"\u{0}"),
711 '\'' => r.push('\''),
712 '"' => r.push_str(r#"\""#),
713 _ => r.extend(c.escape_debug()),
714 }
715 }
716 r.push('"');
717 r
718 }
719}
720
721impl Repr for char {
722 fn repr(&self) -> EcoString {
723 EcoString::from(*self).repr()
724 }
725}
726
727impl Add for Str {
728 type Output = Self;
729
730 fn add(mut self, rhs: Self) -> Self::Output {
731 self += rhs;
732 self
733 }
734}
735
736impl AddAssign for Str {
737 fn add_assign(&mut self, rhs: Self) {
738 self.0.push_str(rhs.as_str());
739 }
740}
741
742impl AsRef<str> for Str {
743 fn as_ref(&self) -> &str {
744 self
745 }
746}
747
748impl Borrow<str> for Str {
749 fn borrow(&self) -> &str {
750 self
751 }
752}
753
754impl From<char> for Str {
755 fn from(c: char) -> Self {
756 Self(c.into())
757 }
758}
759
760impl From<&str> for Str {
761 fn from(s: &str) -> Self {
762 Self(s.into())
763 }
764}
765
766impl From<EcoString> for Str {
767 fn from(s: EcoString) -> Self {
768 Self(s)
769 }
770}
771
772impl From<String> for Str {
773 fn from(s: String) -> Self {
774 Self(s.into())
775 }
776}
777
778impl From<Cow<'_, str>> for Str {
779 fn from(s: Cow<str>) -> Self {
780 Self(s.into())
781 }
782}
783
784impl FromIterator<char> for Str {
785 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
786 Self(iter.into_iter().collect())
787 }
788}
789
790impl From<Str> for EcoString {
791 fn from(str: Str) -> Self {
792 str.0
793 }
794}
795
796impl From<Str> for String {
797 fn from(s: Str) -> Self {
798 s.0.into()
799 }
800}
801
802cast! {
803 char,
804 self => Value::Str(self.into()),
805 string: Str => {
806 let mut chars = string.chars();
807 match (chars.next(), chars.next()) {
808 (Some(c), None) => c,
809 _ => bail!("expected exactly one character"),
810 }
811 },
812}
813
814cast! {
815 &str,
816 self => Value::Str(self.into()),
817}
818
819cast! {
820 EcoString,
821 self => Value::Str(self.into()),
822 v: Str => v.into(),
823}
824
825cast! {
826 String,
827 self => Value::Str(self.into()),
828 v: Str => v.into(),
829}
830
831/// A value that can be cast to a string.
832pub enum ToStr {
833 /// A string value ready to be used as-is.
834 Str(Str),
835 /// An integer about to be formatted in a given base.
836 Int(i64),
837}
838
839cast! {
840 ToStr,
841 v: i64 => Self::Int(v),
842 v: f64 => Self::Str(repr::display_float(v).into()),
843 v: Decimal => Self::Str(format_str!("{}", v)),
844 v: Version => Self::Str(format_str!("{}", v)),
845 v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid utf-8")?),
846 v: Label => Self::Str(v.resolve().as_str().into()),
847 v: Type => Self::Str(v.long_name().into()),
848 v: Str => Self::Str(v),
849}
850
851/// A Unicode normalization form.
852#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
853pub enum UnicodeNormalForm {
854 /// Canonical composition where e.g. accented letters are turned into a
855 /// single Unicode codepoint.
856 #[string("nfc")]
857 Nfc,
858 /// Canonical decomposition where e.g. accented letters are split into a
859 /// separate base and diacritic.
860 #[string("nfd")]
861 Nfd,
862 /// Like NFC, but using the Unicode compatibility decompositions.
863 #[string("nfkc")]
864 Nfkc,
865 /// Like NFD, but using the Unicode compatibility decompositions.
866 #[string("nfkd")]
867 Nfkd,
868}
869
870/// Convert an item of std's `match_indices` to a dictionary.
871fn match_to_dict((start, text): (usize, &str)) -> Dict {
872 dict! {
873 "start" => start,
874 "end" => start + text.len(),
875 "text" => text,
876 "captures" => Array::new(),
877 }
878}
879
880/// Convert regex captures to a dictionary.
881fn captures_to_dict(cap: regex::Captures) -> Dict {
882 let m = cap.get(0).expect("missing first match");
883 dict! {
884 "start" => m.start(),
885 "end" => m.end(),
886 "text" => m.as_str(),
887 "captures" => cap.iter()
888 .skip(1)
889 .map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
890 .collect::<Array>(),
891 }
892}
893
894/// The out of bounds access error message.
895#[cold]
896fn out_of_bounds(index: i64, len: usize) -> EcoString {
897 eco_format!("string index out of bounds (index: {}, len: {})", index, len)
898}
899
900/// The out of bounds access error message when no default value was given.
901#[cold]
902fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
903 eco_format!(
904 "no default value was specified and string index out of bounds (index: {}, len: {})",
905 index,
906 len
907 )
908}
909
910/// The char boundary access error message.
911#[cold]
912fn not_a_char_boundary(index: i64) -> EcoString {
913 eco_format!("string index {} is not a character boundary", index)
914}
915
916/// The error message when the string is empty.
917#[cold]
918fn string_is_empty() -> EcoString {
919 "string is empty".into()
920}
921
922/// A regular expression.
923///
924/// Can be used as a [show rule selector]($styling/#show-rules) and with
925/// [string methods]($str) like `find`, `split`, and `replace`.
926///
927/// [See here](https://docs.rs/regex/latest/regex/#syntax) for a specification
928/// of the supported syntax.
929///
930/// # Example
931/// ```example
932/// // Works with string methods.
933/// #"a,b;c".split(regex("[,;]"))
934///
935/// // Works with show rules.
936/// #show regex("\d+"): set text(red)
937///
938/// The numbers 1 to 10.
939/// ```
940#[ty(scope)]
941#[derive(Debug, Clone)]
942pub struct Regex(regex::Regex);
943
944impl Regex {
945 /// Create a new regular expression.
946 pub fn new(re: &str) -> StrResult<Self> {
947 regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
948 }
949}
950
951#[scope]
952impl Regex {
953 /// Create a regular expression from a string.
954 #[func(constructor)]
955 pub fn construct(
956 /// The regular expression as a string.
957 ///
958 /// Most regex escape sequences just work because they are not valid Typst
959 /// escape sequences. To produce regex escape sequences that are also valid in
960 /// Typst (e.g. `[\\]`), you need to escape twice. Thus, to match a verbatim
961 /// backslash, you would need to write `{regex("\\\\")}`.
962 ///
963 /// If you need many escape sequences, you can also create a raw element
964 /// and extract its text to use it for your regular expressions:
965 /// ```{regex(`\d+\.\d+\.\d+`.text)}```.
966 regex: Spanned<Str>,
967 ) -> SourceResult<Regex> {
968 Self::new(®ex.v).at(regex.span)
969 }
970}
971
972impl Deref for Regex {
973 type Target = regex::Regex;
974
975 fn deref(&self) -> &Self::Target {
976 &self.0
977 }
978}
979
980impl Repr for Regex {
981 fn repr(&self) -> EcoString {
982 eco_format!("regex({})", self.0.as_str().repr())
983 }
984}
985
986impl PartialEq for Regex {
987 fn eq(&self, other: &Self) -> bool {
988 self.0.as_str() == other.0.as_str()
989 }
990}
991
992impl Hash for Regex {
993 fn hash<H: Hasher>(&self, state: &mut H) {
994 self.0.as_str().hash(state);
995 }
996}
997
998/// A pattern which can be searched for in a string.
999#[derive(Debug, Clone)]
1000pub enum StrPattern {
1001 /// Just a string.
1002 Str(Str),
1003 /// A regular expression.
1004 Regex(Regex),
1005}
1006
1007cast! {
1008 StrPattern,
1009 self => match self {
1010 Self::Str(v) => v.into_value(),
1011 Self::Regex(v) => v.into_value(),
1012 },
1013 v: Str => Self::Str(v),
1014 v: Regex => Self::Regex(v),
1015}
1016
1017/// A side of a string.
1018#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
1019pub enum StrSide {
1020 /// The logical start of the string, may be left or right depending on the
1021 /// language.
1022 Start,
1023 /// The logical end of the string.
1024 End,
1025}
1026
1027cast! {
1028 StrSide,
1029 v: Alignment => match v {
1030 Alignment::START => Self::Start,
1031 Alignment::END => Self::End,
1032 _ => bail!("expected either `start` or `end`"),
1033 },
1034}
1035
1036/// A replacement for a matched [`Str`]
1037pub enum Replacement {
1038 /// A string a match is replaced with.
1039 Str(Str),
1040 /// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`)
1041 /// whose output is inserted for the match.
1042 Func(Func),
1043}
1044
1045cast! {
1046 Replacement,
1047 self => match self {
1048 Self::Str(v) => v.into_value(),
1049 Self::Func(v) => v.into_value(),
1050 },
1051 v: Str => Self::Str(v),
1052 v: Func => Self::Func(v)
1053}