Skip to main content

fallow_extract/
mdx.rs

1//! MDX import/export statement extraction.
2//!
3//! Extracts `import` and `export` lines from MDX files (Markdown with JSX),
4//! handling multi-line imports via brace depth tracking.
5
6use std::path::Path;
7
8use oxc_allocator::Allocator;
9use oxc_ast_visit::Visit;
10use oxc_parser::Parser;
11use oxc_span::SourceType;
12
13use crate::ModuleInfo;
14use crate::visitor::ModuleInfoExtractor;
15use fallow_types::discover::FileId;
16
17/// Extract import/export statements from MDX content.
18///
19/// MDX files are Markdown with JSX. Only `import` and `export` lines are relevant
20/// for dead code analysis. Multi-line imports (with unmatched braces) are handled
21/// by tracking brace depth.
22///
23/// NOTE: CSS/SCSS `@apply` is handled in `parse_css_to_module()`, not here.
24/// MDX import/export extraction only handles JS/TS `import`/`export` statements.
25pub fn extract_mdx_statements(source: &str) -> String {
26    let mut statements = Vec::new();
27    let mut in_multiline = false;
28    let mut brace_depth: i32 = 0;
29
30    for line in source.lines() {
31        let trimmed = line.trim();
32        if in_multiline {
33            statements.push(line.to_string());
34            brace_depth += trimmed.chars().filter(|&c| c == '{').count() as i32;
35            brace_depth -= trimmed.chars().filter(|&c| c == '}').count() as i32;
36            if brace_depth <= 0
37                || trimmed.ends_with(';')
38                || trimmed.contains(" from ")
39                || trimmed.contains(" from'")
40                || trimmed.contains(" from\"")
41            {
42                in_multiline = false;
43                brace_depth = 0;
44            }
45        } else if trimmed.starts_with("import ")
46            || trimmed.starts_with("import{")
47            || trimmed.starts_with("export ")
48            || trimmed.starts_with("export{")
49        {
50            statements.push(line.to_string());
51            brace_depth = trimmed.chars().filter(|&c| c == '{').count() as i32
52                - trimmed.chars().filter(|&c| c == '}').count() as i32;
53            if brace_depth > 0 && !trimmed.contains(" from ") {
54                in_multiline = true;
55            }
56        }
57    }
58
59    statements.join("\n")
60}
61
62pub(crate) fn is_mdx_file(path: &Path) -> bool {
63    path.extension()
64        .and_then(|e| e.to_str())
65        .is_some_and(|ext| ext == "mdx")
66}
67
68/// Parse an MDX file by extracting import/export statements.
69pub(crate) fn parse_mdx_to_module(file_id: FileId, source: &str, content_hash: u64) -> ModuleInfo {
70    let suppressions = crate::suppress::parse_suppressions_from_source(source);
71    let line_offsets = fallow_types::extract::compute_line_offsets(source);
72    let statements = extract_mdx_statements(source);
73
74    if !statements.is_empty() {
75        let source_type = SourceType::jsx();
76        let allocator = Allocator::default();
77        let parser_return = Parser::new(&allocator, &statements, source_type).parse();
78        let mut extractor = ModuleInfoExtractor::new();
79        extractor.visit_program(&parser_return.program);
80        let mut info = extractor.into_module_info(file_id, content_hash, suppressions);
81        info.line_offsets = line_offsets;
82        return info;
83    }
84
85    let mut info = ModuleInfoExtractor::new().into_module_info(file_id, content_hash, suppressions);
86    info.line_offsets = line_offsets;
87    info
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    // ── is_mdx_file ──────────────────────────────────────────────
95
96    #[test]
97    fn is_mdx_file_positive() {
98        assert!(is_mdx_file(Path::new("post.mdx")));
99    }
100
101    #[test]
102    fn is_mdx_file_rejects_md() {
103        assert!(!is_mdx_file(Path::new("readme.md")));
104    }
105
106    #[test]
107    fn is_mdx_file_rejects_tsx() {
108        assert!(!is_mdx_file(Path::new("component.tsx")));
109    }
110
111    #[test]
112    fn is_mdx_file_rejects_jsx() {
113        assert!(!is_mdx_file(Path::new("component.jsx")));
114    }
115
116    // ── extract_mdx_statements: import extraction ────────────────
117
118    #[test]
119    fn extracts_single_import() {
120        let result = extract_mdx_statements("import { Chart } from './Chart'\n\n# Title\n");
121        assert!(result.contains("import { Chart } from './Chart'"));
122    }
123
124    #[test]
125    fn extracts_default_import() {
126        let result = extract_mdx_statements("import Button from './Button'\n\n# Title\n");
127        assert!(result.contains("import Button from './Button'"));
128    }
129
130    #[test]
131    fn extracts_multiple_imports() {
132        let source = "import { A } from './a'\nimport { B } from './b'\n\n# Title\n";
133        let result = extract_mdx_statements(source);
134        assert!(result.contains("import { A } from './a'"));
135        assert!(result.contains("import { B } from './b'"));
136    }
137
138    #[test]
139    fn extracts_import_no_space() {
140        let result = extract_mdx_statements("import{ Chart } from './Chart'\n\n# Title\n");
141        assert!(result.contains("import{ Chart }"));
142    }
143
144    // ── Export extraction ────────────────────────────────────────
145
146    #[test]
147    fn extracts_export_const() {
148        let result = extract_mdx_statements("export const meta = { title: 'Hello' }\n\n# Title\n");
149        assert!(result.contains("export const meta"));
150    }
151
152    #[test]
153    fn extracts_export_no_space() {
154        let result = extract_mdx_statements("export{ foo } from './foo'\n\n# Title\n");
155        assert!(result.contains("export{ foo }"));
156    }
157
158    // ── Multi-line imports ───────────────────────────────────────
159
160    #[test]
161    fn multiline_import_with_braces() {
162        let source =
163            "import {\n  Chart,\n  Table,\n  Graph\n} from './components'\n\n# Dashboard\n";
164        let result = extract_mdx_statements(source);
165        assert!(result.contains("Chart"));
166        assert!(result.contains("Table"));
167        assert!(result.contains("Graph"));
168        assert!(result.contains("from './components'"));
169    }
170
171    #[test]
172    fn multiline_import_closed_by_from() {
173        let source = "import {\n  Foo,\n  Bar\n} from './mod'\n\n# Content\n";
174        let result = extract_mdx_statements(source);
175        assert!(result.contains("Foo"));
176        assert!(result.contains("Bar"));
177    }
178
179    // ── Mixed content ────────────────────────────────────────────
180
181    #[test]
182    fn imports_between_prose() {
183        let source = "import { Header } from './Header'\n\n# Section 1\n\nSome content.\n\nimport { Footer } from './Footer'\n\n## Section 2\n";
184        let result = extract_mdx_statements(source);
185        assert!(result.contains("Header"));
186        assert!(result.contains("Footer"));
187    }
188
189    #[test]
190    fn prose_lines_excluded() {
191        let source =
192            "import { A } from './a'\n\n# Title\n\nSome **markdown** text.\n\n- List item\n";
193        let result = extract_mdx_statements(source);
194        assert!(!result.contains("Title"));
195        assert!(!result.contains("markdown"));
196        assert!(!result.contains("List item"));
197    }
198
199    // ── Edge cases ───────────────────────────────────────────────
200
201    #[test]
202    fn empty_source() {
203        let result = extract_mdx_statements("");
204        assert!(result.is_empty());
205    }
206
207    #[test]
208    fn no_imports_or_exports() {
209        let result = extract_mdx_statements("# Just Markdown\n\nNo imports here.\n");
210        assert!(result.is_empty());
211    }
212
213    #[test]
214    fn import_like_text_not_extracted() {
215        // "important" starts with "import" but doesn't match "import " or "import{"
216        let result = extract_mdx_statements("This is an important note.\n");
217        assert!(result.is_empty());
218    }
219
220    #[test]
221    fn export_like_text_not_extracted() {
222        // "exporting" doesn't match "export " or "export{"
223        let result = extract_mdx_statements("We are exporting goods overseas.\n");
224        assert!(result.is_empty());
225    }
226
227    #[test]
228    fn side_effect_import() {
229        let result = extract_mdx_statements("import './global.css'\n\n# Title\n");
230        assert!(result.contains("import './global.css'"));
231    }
232
233    #[test]
234    fn namespace_import() {
235        let result = extract_mdx_statements("import * as utils from './utils'\n\n# Title\n");
236        assert!(result.contains("import * as utils from './utils'"));
237    }
238
239    #[test]
240    fn single_line_import_with_braces_balanced() {
241        // Braces balanced on one line — should NOT enter multiline mode
242        let source = "import { A } from './a'\n# Title\n";
243        let result = extract_mdx_statements(source);
244        let lines: Vec<&str> = result.lines().collect();
245        assert_eq!(lines.len(), 1);
246    }
247
248    // ── Multi-line import is extracted as one statement ──────────
249
250    #[test]
251    fn multiline_import_with_braces_extracted_as_one() {
252        let source = "import {\n  Foo,\n  Bar\n} from './module'\n\n# Title\n";
253        let result = extract_mdx_statements(source);
254        assert!(result.contains("Foo"), "Foo should be in the result");
255        assert!(result.contains("Bar"), "Bar should be in the result");
256        assert!(
257            result.contains("from './module'"),
258            "from clause should be in the result"
259        );
260    }
261
262    // ── Re-export with braces ───────────────────────────────────
263
264    #[test]
265    fn export_with_braces_from_module() {
266        let source = "export { Foo, Bar } from './module'\n\n# Title\n";
267        let result = extract_mdx_statements(source);
268        assert!(result.contains("export { Foo, Bar } from './module'"));
269    }
270
271    // ── Non-import/export lines between imports are ignored ─────
272
273    #[test]
274    fn non_import_lines_between_imports_ignored() {
275        let source = "import { A } from './a'\n\n# Some heading\n\nA paragraph of text.\n\nimport { B } from './b'\n";
276        let result = extract_mdx_statements(source);
277        assert!(result.contains("import { A } from './a'"));
278        assert!(result.contains("import { B } from './b'"));
279        assert!(!result.contains("heading"), "prose should not be extracted");
280        assert!(
281            !result.contains("paragraph"),
282            "prose should not be extracted"
283        );
284        // Only 2 lines total
285        let lines: Vec<&str> = result.lines().collect();
286        assert_eq!(lines.len(), 2);
287    }
288}