Skip to main content

double_o/
learn_utils.rs

1//! Utility functions for the learn module.
2
3/// Truncate a UTF-8 string to a maximum byte count, respecting character boundaries.
4pub(crate) fn truncate_utf8(s: &str, max_bytes: usize) -> &str {
5    if s.len() <= max_bytes {
6        return s;
7    }
8    let mut end = max_bytes;
9    while end > 0 && !s.is_char_boundary(end) {
10        end -= 1;
11    }
12    &s[..end]
13}
14
15/// Strip markdown code fences (``` and ~~~) from a string.
16///
17/// Removes leading/trailing fence markers and optional language identifiers,
18/// returning only the content between the fences.
19pub(crate) fn strip_fences(s: &str) -> String {
20    let trimmed = s.trim();
21    // Remove ```toml, ```rust, etc., or plain ```
22    let start = if let Some(rest) = trimmed.strip_prefix("```") {
23        // Skip language identifier if present (e.g., "```toml")
24        rest.lines().next().map(|l| l.trim()).unwrap_or(rest).len() + 3
25    } else if let Some(rest) = trimmed.strip_prefix("~~~") {
26        // Similar handling for ~~~ fences
27        rest.lines().next().map(|l| l.trim()).unwrap_or(rest).len() + 3
28    } else {
29        0
30    };
31
32    let content = if start > 0 && start < trimmed.len() {
33        trimmed[start..].trim()
34    } else {
35        trimmed
36    };
37
38    // Remove trailing fence
39    content
40        .rsplit_once("\n```")
41        .map(|(c, _)| c)
42        .or_else(|| content.rsplit_once("\n~~~").map(|(c, _)| c))
43        .unwrap_or(content)
44        .trim()
45        .to_string()
46}
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51
52    #[test]
53    fn test_truncate_utf8_short_string() {
54        let s = "hello";
55        assert_eq!(truncate_utf8(s, 10), s);
56    }
57
58    #[test]
59    fn test_truncate_utf8_exact_length() {
60        let s = "hello";
61        assert_eq!(truncate_utf8(s, 5), s);
62    }
63
64    #[test]
65    fn test_truncate_utf8_multibyte() {
66        let s = "你好世界"; // 4 Chinese characters, each 3 bytes in UTF-8
67        assert_eq!(truncate_utf8(s, 6), "你好"); // 6 bytes = 2 characters
68    }
69
70    #[test]
71    fn test_truncate_utf8_char_boundary() {
72        let s = "hello世界";
73        let result = truncate_utf8(s, 7); // "hello" (5 bytes) + partial of first Chinese char
74        assert_eq!(result, "hello"); // Should truncate to valid UTF-8 boundary
75    }
76
77    #[test]
78    fn test_strip_fences_code_blocks() {
79        let s = "```toml\ncommand_match = \"test\"\n```";
80        assert_eq!(strip_fences(s), "command_match = \"test\"");
81    }
82
83    #[test]
84    fn test_strip_fences_without_language() {
85        let s = "```\ncommand_match = \"test\"\n```";
86        assert_eq!(strip_fences(s), "command_match = \"test\"");
87    }
88
89    #[test]
90    fn test_strip_fences_tilde_fences() {
91        let s = "~~~\ncommand_match = \"test\"\n~~~";
92        assert_eq!(strip_fences(s), "command_match = \"test\"");
93    }
94
95    #[test]
96    fn test_strip_fences_no_fence() {
97        let s = "command_match = \"test\"";
98        assert_eq!(strip_fences(s), "command_match = \"test\"");
99    }
100}