venus_sync/
parser.rs

1//! Parser for Venus `.rs` notebooks.
2//!
3//! Extracts cells, doc comments, and metadata from `.rs` files.
4
5use std::fs;
6use std::path::Path;
7
8use crate::error::{SyncError, SyncResult};
9
10/// Metadata extracted from the notebook header.
11#[derive(Debug, Clone, Default)]
12pub struct NotebookMetadata {
13    /// Notebook title (from first `# Title` in doc comment)
14    pub title: Option<String>,
15
16    /// Notebook description (from doc comment after title)
17    pub description: Option<String>,
18
19    /// Dependencies (parsed from `//! ```cargo` block)
20    pub dependencies: Vec<String>,
21}
22
23/// A cell extracted from the notebook.
24#[derive(Debug, Clone)]
25pub struct NotebookCell {
26    /// Cell name (function name)
27    pub name: String,
28
29    /// Cell type
30    pub cell_type: CellType,
31
32    /// Markdown content (for markdown cells or doc comments)
33    pub markdown: Option<String>,
34
35    /// Rust source code (for code cells)
36    pub source: Option<String>,
37
38    /// Whether this cell has dependencies
39    pub has_dependencies: bool,
40}
41
42/// Type of cell.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum CellType {
45    /// Markdown documentation cell
46    Markdown,
47    /// Code cell with executable Rust
48    Code,
49}
50
51/// Parser for `.rs` Venus notebooks.
52pub struct RsParser {
53    // Reserved for future configuration
54}
55
56impl RsParser {
57    /// Create a new parser.
58    pub fn new() -> Self {
59        Self {}
60    }
61
62    /// Parse a `.rs` file into notebook metadata and cells.
63    pub fn parse_file(
64        &self,
65        path: impl AsRef<Path>,
66    ) -> SyncResult<(NotebookMetadata, Vec<NotebookCell>)> {
67        let path = path.as_ref();
68        let source = fs::read_to_string(path).map_err(|e| SyncError::ReadError {
69            path: path.to_path_buf(),
70            message: e.to_string(),
71        })?;
72
73        self.parse_source(&source)
74    }
75
76    /// Parse source code into notebook metadata and cells.
77    pub fn parse_source(&self, source: &str) -> SyncResult<(NotebookMetadata, Vec<NotebookCell>)> {
78        let mut metadata = NotebookMetadata::default();
79        let mut cells = Vec::new();
80
81        // First pass: extract module-level doc comments for metadata
82        let mut in_cargo_block = false;
83        let mut header_lines = Vec::new();
84
85        for line in source.lines() {
86            let trimmed = line.trim();
87
88            if trimmed.starts_with("//!") {
89                let content = trimmed.trim_start_matches("//!").trim();
90
91                if content == "```cargo" {
92                    in_cargo_block = true;
93                    continue;
94                }
95
96                if content == "```" && in_cargo_block {
97                    in_cargo_block = false;
98                    continue;
99                }
100
101                if in_cargo_block {
102                    // Skip cargo block content for markdown
103                    if content.starts_with('[') || content.contains('=') {
104                        continue;
105                    }
106                }
107
108                header_lines.push(content.to_string());
109            } else if !trimmed.is_empty() && !trimmed.starts_with("//") {
110                break;
111            }
112        }
113
114        // Parse header for title and description
115        if !header_lines.is_empty() {
116            let first = &header_lines[0];
117            if first.starts_with("# ") {
118                metadata.title = Some(first.trim_start_matches("# ").to_string());
119                if header_lines.len() > 1 {
120                    // Skip empty lines after title
121                    let desc_lines: Vec<&String> = header_lines[1..]
122                        .iter()
123                        .skip_while(|l| l.is_empty())
124                        .collect();
125                    if !desc_lines.is_empty() {
126                        metadata.description = Some(
127                            desc_lines
128                                .iter()
129                                .map(|s| s.as_str())
130                                .collect::<Vec<_>>()
131                                .join("\n"),
132                        );
133                    }
134                }
135            }
136        }
137
138        // Create a markdown cell for the header if there's content
139        let header_md = self.extract_header_markdown(source);
140        if let Some(md) = header_md {
141            cells.push(NotebookCell {
142                name: "_header".to_string(),
143                cell_type: CellType::Markdown,
144                markdown: Some(md),
145                source: None,
146                has_dependencies: false,
147            });
148        }
149
150        // Second pass: extract cells
151        self.extract_cells(source, &mut cells)?;
152
153        Ok((metadata, cells))
154    }
155
156    /// Extract the header markdown from module doc comments.
157    fn extract_header_markdown(&self, source: &str) -> Option<String> {
158        let mut lines = Vec::new();
159        let mut in_cargo_block = false;
160
161        for line in source.lines() {
162            let trimmed = line.trim();
163
164            if trimmed.starts_with("//!") {
165                let content = trimmed.trim_start_matches("//!").trim_start();
166
167                if content == "```cargo" {
168                    in_cargo_block = true;
169                    continue;
170                }
171
172                if content == "```" && in_cargo_block {
173                    in_cargo_block = false;
174                    continue;
175                }
176
177                if !in_cargo_block {
178                    lines.push(content.to_string());
179                }
180            } else if !trimmed.is_empty() && !trimmed.starts_with("//") {
181                break;
182            }
183        }
184
185        if lines.is_empty() || lines.iter().all(|l| l.is_empty()) {
186            None
187        } else {
188            // Trim trailing empty lines
189            while lines.last().map(|l| l.is_empty()).unwrap_or(false) {
190                lines.pop();
191            }
192            Some(lines.join("\n"))
193        }
194    }
195
196    /// Extract cells from source code.
197    fn extract_cells(&self, source: &str, cells: &mut Vec<NotebookCell>) -> SyncResult<()> {
198        let lines: Vec<&str> = source.lines().collect();
199        let mut i = 0;
200
201        while i < lines.len() {
202            let line = lines[i].trim();
203
204            // Look for #[venus::cell] attribute
205            if line == "#[venus::cell]" || line.starts_with("#[venus::cell(") {
206                // Collect doc comment before the attribute
207                let doc_start = self.find_doc_comment_start(&lines, i);
208                let doc_comment = if doc_start < i {
209                    Some(self.extract_doc_comment(&lines[doc_start..i]))
210                } else {
211                    None
212                };
213
214                // Find the function
215                i += 1;
216                while i < lines.len() && lines[i].trim().is_empty() {
217                    i += 1;
218                }
219
220                if i >= lines.len() {
221                    break;
222                }
223
224                // Extract function
225                let fn_line = lines[i].trim();
226                if !fn_line.starts_with("pub fn ") && !fn_line.starts_with("fn ") {
227                    i += 1;
228                    continue;
229                }
230
231                // Get function name and check for dependencies
232                let (name, has_deps) = self.parse_function_signature(fn_line);
233
234                // Find the end of the function (matching braces)
235                let fn_start = i;
236                let fn_end = self.find_function_end(&lines, i);
237
238                // Build the source (include attribute and function)
239                let attr_line = if doc_start < fn_start - 1 {
240                    lines[fn_start - 1].trim()
241                } else {
242                    "#[venus::cell]"
243                };
244
245                let mut source_lines = vec![attr_line.to_string()];
246                for line in lines.iter().take(fn_end + 1).skip(fn_start) {
247                    source_lines.push((*line).to_string());
248                }
249                let source_code = source_lines.join("\n");
250
251                // Add markdown cell for doc comment if present
252                if let Some(md) = &doc_comment
253                    && !md.is_empty()
254                {
255                    cells.push(NotebookCell {
256                        name: format!("{}_doc", name),
257                        cell_type: CellType::Markdown,
258                        markdown: Some(md.clone()),
259                        source: None,
260                        has_dependencies: false,
261                    });
262                }
263
264                // Add code cell
265                cells.push(NotebookCell {
266                    name: name.clone(),
267                    cell_type: CellType::Code,
268                    markdown: None,
269                    source: Some(source_code),
270                    has_dependencies: has_deps,
271                });
272
273                i = fn_end + 1;
274            } else {
275                i += 1;
276            }
277        }
278
279        Ok(())
280    }
281
282    /// Find the start of doc comments before a given line.
283    fn find_doc_comment_start(&self, lines: &[&str], attr_line: usize) -> usize {
284        if attr_line == 0 {
285            return attr_line;
286        }
287
288        let mut start = attr_line;
289        for i in (0..attr_line).rev() {
290            let trimmed = lines[i].trim();
291            if trimmed.starts_with("///") || trimmed.is_empty() {
292                start = i;
293            } else {
294                break;
295            }
296        }
297
298        // Skip leading empty lines
299        while start < attr_line && lines[start].trim().is_empty() {
300            start += 1;
301        }
302
303        start
304    }
305
306    /// Extract doc comment content from lines.
307    fn extract_doc_comment(&self, lines: &[&str]) -> String {
308        lines
309            .iter()
310            .filter(|l| l.trim().starts_with("///"))
311            .map(|l| {
312                let content = l.trim().trim_start_matches("///");
313                // Remove leading space if present
314                content.strip_prefix(' ').unwrap_or(content)
315            })
316            .collect::<Vec<_>>()
317            .join("\n")
318    }
319
320    /// Parse function signature to extract name and check for dependencies.
321    fn parse_function_signature(&self, line: &str) -> (String, bool) {
322        // Remove visibility and fn keyword
323        let stripped = line
324            .trim_start_matches("pub ")
325            .trim_start_matches("fn ")
326            .trim();
327
328        // Extract name (before the '(')
329        let name = stripped
330            .split('(')
331            .next()
332            .unwrap_or("unknown")
333            .trim()
334            .to_string();
335
336        // Check for dependencies (non-empty parameter list)
337        let has_deps = if let Some(params_start) = stripped.find('(') {
338            if let Some(params_end) = stripped.find(')') {
339                let params = &stripped[params_start + 1..params_end].trim();
340                !params.is_empty()
341                    && *params != "&mut self"
342                    && *params != "&self"
343                    && *params != "self"
344            } else {
345                false
346            }
347        } else {
348            false
349        };
350
351        (name, has_deps)
352    }
353
354    /// Find the end of a function (matching closing brace).
355    fn find_function_end(&self, lines: &[&str], start: usize) -> usize {
356        let mut brace_count = 0;
357        let mut found_open = false;
358
359        for (i, line) in lines.iter().enumerate().skip(start) {
360            for c in line.chars() {
361                if c == '{' {
362                    brace_count += 1;
363                    found_open = true;
364                } else if c == '}' {
365                    brace_count -= 1;
366                    if found_open && brace_count == 0 {
367                        return i;
368                    }
369                }
370            }
371        }
372
373        // If no end found, return the last line
374        lines.len().saturating_sub(1)
375    }
376}
377
378impl Default for RsParser {
379    fn default() -> Self {
380        Self::new()
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    #[test]
389    fn test_parse_simple_notebook() {
390        let source = r#"//! # My Notebook
391//!
392//! A test notebook.
393
394use venus::prelude::*;
395
396/// Returns a greeting.
397#[venus::cell]
398pub fn hello() -> String {
399    "Hello, Venus!".to_string()
400}
401"#;
402
403        let parser = RsParser::new();
404        let (metadata, cells) = parser.parse_source(source).unwrap();
405
406        assert_eq!(metadata.title, Some("My Notebook".to_string()));
407        assert!(metadata.description.is_some());
408
409        // Should have header markdown + doc + code cell
410        assert!(cells.len() >= 2);
411
412        // Find the code cell
413        let code_cell = cells
414            .iter()
415            .find(|c| c.cell_type == CellType::Code)
416            .unwrap();
417        assert_eq!(code_cell.name, "hello");
418        assert!(!code_cell.has_dependencies);
419    }
420
421    #[test]
422    fn test_parse_cell_with_dependencies() {
423        let source = r#"
424#[venus::cell]
425pub fn process(data: &String) -> i32 {
426    data.len() as i32
427}
428"#;
429
430        let parser = RsParser::new();
431        let (_, cells) = parser.parse_source(source).unwrap();
432
433        let code_cell = cells
434            .iter()
435            .find(|c| c.cell_type == CellType::Code)
436            .unwrap();
437        assert_eq!(code_cell.name, "process");
438        assert!(code_cell.has_dependencies);
439    }
440
441    #[test]
442    fn test_extract_doc_comment() {
443        let source = r#"
444/// This is a doc comment.
445/// It has multiple lines.
446#[venus::cell]
447pub fn example() -> i32 { 42 }
448"#;
449
450        let parser = RsParser::new();
451        let (_, cells) = parser.parse_source(source).unwrap();
452
453        // Should have markdown cell from doc comment
454        let md_cell = cells
455            .iter()
456            .find(|c| c.cell_type == CellType::Markdown)
457            .unwrap();
458        assert!(md_cell.markdown.as_ref().unwrap().contains("doc comment"));
459    }
460
461    #[test]
462    fn test_function_signature_parsing() {
463        let parser = RsParser::new();
464
465        let (name, has_deps) = parser.parse_function_signature("pub fn hello() -> String {");
466        assert_eq!(name, "hello");
467        assert!(!has_deps);
468
469        let (name, has_deps) =
470            parser.parse_function_signature("fn process(data: &Config) -> Output {");
471        assert_eq!(name, "process");
472        assert!(has_deps);
473    }
474}