1use std::ops::Range;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct ScopedRegion {
8 pub language: String,
10 pub byte_range: Range<usize>,
12}
13
14pub struct ScopeParser;
22
23impl ScopeParser {
24 #[must_use]
31 pub fn parse(text: &str) -> Vec<ScopedRegion> {
32 let mut markers: Vec<(usize, String)> = Vec::new();
33
34 for (line_start, line) in line_byte_offsets(text) {
35 if let Some(lang) = Self::extract_marker(line) {
36 let scope_start = line_start + line.len();
38 let scope_start = if text.as_bytes().get(scope_start) == Some(&b'\n') {
40 scope_start + 1
41 } else {
42 scope_start
43 };
44 markers.push((scope_start, lang));
45 }
46 }
47
48 let mut regions = Vec::with_capacity(markers.len());
49 for (i, (start, lang)) in markers.iter().enumerate() {
50 let end = markers.get(i + 1).map_or(text.len(), |(next_start, _)| {
51 text[..*next_start]
53 .rfind('\n')
54 .map_or(*next_start, |nl_pos| {
55 text[..nl_pos].rfind('\n').map_or(0, |prev_nl| prev_nl + 1)
57 })
58 });
59
60 if end > *start {
61 regions.push(ScopedRegion {
62 language: lang.clone(),
63 byte_range: *start..end,
64 });
65 }
66 }
67
68 regions
69 }
70
71 #[must_use]
73 pub fn language_at(regions: &[ScopedRegion], byte_offset: usize) -> Option<&str> {
74 regions
75 .iter()
76 .find(|r| r.byte_range.contains(&byte_offset))
77 .map(|r| r.language.as_str())
78 }
79
80 fn extract_marker(line: &str) -> Option<String> {
81 let trimmed = line.trim();
82
83 if let Some(rest) = trimmed.strip_prefix("<!--")
85 && let Some(inner) = rest.strip_suffix("-->")
86 {
87 return Self::parse_lang_directive(inner.trim());
88 }
89
90 if let Some(rest) = trimmed.strip_prefix("//") {
92 return Self::parse_lang_directive(rest.trim());
93 }
94
95 if let Some(rest) = trimmed.strip_prefix("/*")
97 && let Some(inner) = rest.strip_suffix("*/")
98 {
99 return Self::parse_lang_directive(inner.trim());
100 }
101
102 if let Some(rest) = trimmed.strip_prefix('%') {
104 return Self::parse_lang_directive(rest.trim());
105 }
106
107 None
108 }
109
110 fn parse_lang_directive(s: &str) -> Option<String> {
111 let s = s.strip_prefix('@').unwrap_or(s);
113 let s = s.strip_prefix("lang").unwrap_or_default();
114 let s = s.strip_prefix(':').unwrap_or_default();
115 let lang = s.trim();
116
117 if lang.is_empty() || lang.len() > 10 || lang.contains(' ') {
118 return None;
119 }
120
121 Some(lang.to_string())
122 }
123}
124
125fn line_byte_offsets(text: &str) -> impl Iterator<Item = (usize, &str)> {
127 let mut offset = 0;
128 text.split_inclusive('\n').map(move |line| {
129 let start = offset;
130 offset += line.len();
131 (start, line)
132 })
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 #[test]
140 fn html_comment_marker() {
141 let text = "English text.\n<!-- lang: fr -->\nTexte français.\n";
142 let regions = ScopeParser::parse(text);
143 assert_eq!(regions.len(), 1);
144 assert_eq!(regions[0].language, "fr");
145 let scoped_text = &text[regions[0].byte_range.clone()];
146 assert!(scoped_text.contains("Texte français"));
147 }
148
149 #[test]
150 fn line_comment_marker() {
151 let text = "English.\n// @lang: de\nDeutscher Text.\n";
152 let regions = ScopeParser::parse(text);
153 assert_eq!(regions.len(), 1);
154 assert_eq!(regions[0].language, "de");
155 }
156
157 #[test]
158 fn block_comment_marker() {
159 let text = "Hello.\n/* @lang: es */\nTexto español.\n";
160 let regions = ScopeParser::parse(text);
161 assert_eq!(regions.len(), 1);
162 assert_eq!(regions[0].language, "es");
163 }
164
165 #[test]
166 fn latex_comment_marker() {
167 let text = "English.\n% @lang: fr\nFrançais.\n";
168 let regions = ScopeParser::parse(text);
169 assert_eq!(regions.len(), 1);
170 assert_eq!(regions[0].language, "fr");
171 }
172
173 #[test]
174 fn multiple_regions() {
175 let text = "\
176English paragraph.
177<!-- lang: fr -->
178Paragraphe français.
179<!-- lang: de -->
180Deutscher Absatz.
181";
182 let regions = ScopeParser::parse(text);
183 assert_eq!(regions.len(), 2);
184 assert_eq!(regions[0].language, "fr");
185 assert_eq!(regions[1].language, "de");
186 }
187
188 #[test]
189 fn no_markers() {
190 let text = "Just plain English text with no annotations.";
191 let regions = ScopeParser::parse(text);
192 assert!(regions.is_empty());
193 }
194
195 #[test]
196 fn language_at_lookup() {
197 let text = "Hello.\n<!-- lang: fr -->\nBonjour.\n";
198 let regions = ScopeParser::parse(text);
199 let bonjour_offset = text.find("Bonjour").unwrap();
201 assert_eq!(
202 ScopeParser::language_at(®ions, bonjour_offset),
203 Some("fr")
204 );
205 assert_eq!(ScopeParser::language_at(®ions, 0), None);
206 }
207
208 #[test]
209 fn marker_without_at_sign() {
210 let text = "Hello.\n<!-- lang: ja -->\n日本語テキスト.\n";
211 let regions = ScopeParser::parse(text);
212 assert_eq!(regions.len(), 1);
213 assert_eq!(regions[0].language, "ja");
214 }
215
216 #[test]
217 fn ignores_invalid_markers() {
218 let text = "<!-- lang: -->\n<!-- lang: this is not a lang -->\n<!-- notlang: fr -->\n";
219 let regions = ScopeParser::parse(text);
220 assert!(regions.is_empty());
221 }
222}