Skip to main content

embed_src/
embed.rs

1use regex::Regex;
2use std::path::Path;
3
4use crate::lang::ext_to_lang;
5
6/// Return a backtick fence long enough to avoid collisions with backtick runs in `content`.
7fn make_fence(content: &str) -> String {
8    let max_run = content
9        .as_bytes()
10        .split(|&b| b != b'`')
11        .map(|run| run.len())
12        .max()
13        .unwrap_or(0);
14    let fence_len = if max_run >= 3 { max_run + 1 } else { 3 };
15    "`".repeat(fence_len)
16}
17
18/// Parse a `lines` attribute value and extract the matching lines from content.
19///
20/// Supported formats (all 1-indexed):
21///   - `"5"` — single line 5
22///   - `"5-10"` — lines 5 through 10 (inclusive)
23///   - `"5-"` — line 5 through end of file
24///   - `"-10"` — line 1 through 10
25fn extract_lines(content: &str, spec: &str) -> String {
26    let lines: Vec<&str> = content.lines().collect();
27    let total = lines.len();
28
29    let (start, end) = if let Some((left, right)) = spec.split_once('-') {
30        let s = if left.is_empty() {
31            1
32        } else {
33            left.parse::<usize>().unwrap_or(1)
34        };
35        let e = if right.is_empty() {
36            total
37        } else {
38            right.parse::<usize>().unwrap_or(total)
39        };
40        (s, e)
41    } else {
42        // Single line number.
43        let n = spec.parse::<usize>().unwrap_or(1);
44        (n, n)
45    };
46
47    // Clamp to valid range.
48    let start = start.max(1).min(total + 1);
49    let end = end.max(start).min(total);
50
51    if start > total {
52        return String::new();
53    }
54
55    lines[(start - 1)..end].join("\n")
56}
57
58/// Result of processing a single file.
59pub struct ProcessResult {
60    pub original: String,
61    pub processed: String,
62}
63
64/// Process a file: find all `embed-src src="..."` directives and replace the
65/// content between them and their closing `/embed-src` markers.
66pub fn process_file(path: &Path) -> Result<ProcessResult, String> {
67    let content = std::fs::read_to_string(path)
68        .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?;
69
70    let base_dir = path.parent().unwrap_or(Path::new("."));
71    let processed = process_content(&content, base_dir);
72
73    Ok(ProcessResult {
74        original: content,
75        processed,
76    })
77}
78
79/// Process content, resolving source paths relative to `base_dir`.
80///
81/// Markers are comment-agnostic: any line containing
82/// `embed-src src="path"` is an opening marker, and any line containing
83/// `/embed-src` is a closing marker. This allows embedding in any file type
84/// (markdown, Rust, Python, YAML, etc.).
85///
86/// By default, content is inserted raw. Use the `fence` attribute to wrap in
87/// markdown code fences: `fence` or `fence="auto"` auto-detects the language
88/// from the source extension; `fence="python"` uses an explicit language tag.
89pub fn process_content(content: &str, base_dir: &Path) -> String {
90    let open_re = Regex::new(r#"embed-src\s+src="([^"]+)""#).unwrap();
91    let lines_re = Regex::new(r#"lines="([^"]+)""#).unwrap();
92    let fence_re = Regex::new(r#"\bfence(?:="([^"]*)")?"#).unwrap();
93    // Match /embed-src preceded by a non-word character (space, comment chars, etc.)
94    // but not as part of a URL like "urmzd/embed-src".
95    let close_re = Regex::new(r#"(?:^|[^a-zA-Z0-9_])/embed-src\b"#).unwrap();
96
97    let lines: Vec<&str> = content.lines().collect();
98    let mut result = Vec::new();
99    let mut i = 0;
100    let has_trailing_newline = content.ends_with('\n');
101    let mut in_fence = false;
102    let mut fence_len: usize = 0;
103
104    while i < lines.len() {
105        let line = lines[i];
106
107        // Track backtick-fenced code blocks so directives inside them are skipped.
108        let trimmed = line.trim_start();
109        if trimmed.starts_with("```") {
110            let backtick_count = trimmed.bytes().take_while(|&b| b == b'`').count();
111            if !in_fence {
112                in_fence = true;
113                fence_len = backtick_count;
114                result.push(line.to_string());
115                i += 1;
116                continue;
117            } else if backtick_count >= fence_len {
118                in_fence = false;
119                fence_len = 0;
120                result.push(line.to_string());
121                i += 1;
122                continue;
123            }
124        }
125
126        if in_fence {
127            result.push(line.to_string());
128            i += 1;
129            continue;
130        }
131
132        if let Some(cap) = open_re.captures(line) {
133            let src_path = cap[1].to_string();
134            let lines_attr = lines_re.captures(line).map(|c| c[1].to_string());
135            let fence_cap = fence_re.captures(line);
136            let has_fence = fence_cap.is_some();
137            let fence_attr = fence_cap.and_then(|c| c.get(1).map(|m| m.as_str().to_string()));
138
139            // Emit the opening marker line.
140            result.push(line.to_string());
141
142            // Skip lines until we find the closing marker or run out of lines.
143            let mut found_close = false;
144            let mut close_line_idx = i + 1;
145            while close_line_idx < lines.len() {
146                if close_re.is_match(lines[close_line_idx]) {
147                    found_close = true;
148                    break;
149                }
150                close_line_idx += 1;
151            }
152
153            if !found_close {
154                // No closing marker: emit remaining lines unchanged.
155                eprintln!(
156                    "Warning: no closing /embed-src found for directive at line {}",
157                    i + 1
158                );
159                i += 1;
160                continue;
161            }
162
163            // Read source file.
164            let file_path = base_dir.join(&src_path);
165            let file_content = match std::fs::read_to_string(&file_path) {
166                Ok(c) => c,
167                Err(e) => {
168                    eprintln!("Warning: could not read {}: {}", file_path.display(), e);
169                    // Emit original lines unchanged.
170                    for line in &lines[(i + 1)..=close_line_idx] {
171                        result.push(line.to_string());
172                    }
173                    i = close_line_idx + 1;
174                    continue;
175                }
176            };
177
178            // Apply line-range filter if specified.
179            let file_content = match &lines_attr {
180                Some(spec) => extract_lines(&file_content, spec),
181                None => file_content,
182            };
183
184            // Insert content: raw or fenced.
185            if has_fence {
186                let lang = match &fence_attr {
187                    Some(lang) if !lang.is_empty() && lang != "auto" => lang.to_string(),
188                    _ => {
189                        // auto-detect from extension
190                        let ext = Path::new(&src_path)
191                            .extension()
192                            .and_then(|e| e.to_str())
193                            .unwrap_or("");
194                        ext_to_lang(ext).to_string()
195                    }
196                };
197                let fence = make_fence(&file_content);
198                result.push(format!("{}{}", fence, lang));
199                result.push(file_content.trim_end().to_string());
200                result.push(fence);
201            } else {
202                // Raw insertion.
203                let trimmed = file_content.trim_end();
204                if !trimmed.is_empty() {
205                    result.push(trimmed.to_string());
206                }
207            }
208
209            // Emit the closing marker line.
210            result.push(lines[close_line_idx].to_string());
211            i = close_line_idx + 1;
212        } else {
213            result.push(line.to_string());
214            i += 1;
215        }
216    }
217
218    let mut output = result.join("\n");
219    if has_trailing_newline {
220        output.push('\n');
221    }
222    output
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use std::path::Path;
229
230    #[test]
231    fn no_directives() {
232        let input = "# Hello\n\nSome text.\n";
233        let result = process_content(input, Path::new("."));
234        assert_eq!(result, input);
235    }
236
237    #[test]
238    fn missing_close_tag() {
239        let input = "<!-- embed-src src=\"foo.rs\" -->\nstale content\n";
240        let result = process_content(input, Path::new("."));
241        // Should leave content unchanged when no closing tag.
242        assert_eq!(result, input);
243    }
244
245    #[test]
246    fn extract_lines_single() {
247        let content = "line1\nline2\nline3\n";
248        assert_eq!(extract_lines(content, "2"), "line2");
249    }
250
251    #[test]
252    fn extract_lines_range() {
253        let content = "a\nb\nc\nd\ne\n";
254        assert_eq!(extract_lines(content, "2-4"), "b\nc\nd");
255    }
256
257    #[test]
258    fn extract_lines_open_end() {
259        let content = "a\nb\nc\nd\n";
260        assert_eq!(extract_lines(content, "3-"), "c\nd");
261    }
262
263    #[test]
264    fn extract_lines_open_start() {
265        let content = "a\nb\nc\nd\n";
266        assert_eq!(extract_lines(content, "-2"), "a\nb");
267    }
268
269    #[test]
270    fn extract_lines_out_of_bounds() {
271        let content = "a\nb\nc\n";
272        // End beyond file length: clamp to last line.
273        assert_eq!(extract_lines(content, "2-100"), "b\nc");
274        // Start beyond file length: empty.
275        assert_eq!(extract_lines(content, "100"), "");
276    }
277}