venus_core/graph/
parser.rs

1//! Cell parser using syn to extract cell information from Rust source files.
2
3use std::path::Path;
4use syn::spanned::Spanned;
5use syn::visit::Visit;
6use syn::{Attribute, File, FnArg, ItemFn, Pat, ReturnType, Type};
7
8use super::types::{CellId, CellInfo, Dependency, DefinitionCell, MarkdownCell, SourceSpan};
9use crate::error::{Error, Result};
10
11/// Result of parsing a notebook file.
12#[derive(Debug, Clone)]
13pub struct ParseResult {
14    /// Code cells (executable functions with #[venus::cell]).
15    pub code_cells: Vec<CellInfo>,
16    /// Markdown cells (documentation blocks).
17    pub markdown_cells: Vec<MarkdownCell>,
18    /// Definition cells (imports, types, helper functions).
19    pub definition_cells: Vec<DefinitionCell>,
20}
21
22/// Parser for extracting cells from Rust source files.
23pub struct CellParser {
24    /// Extracted code cells
25    cells: Vec<CellInfo>,
26    /// Extracted markdown cells
27    markdown_cells: Vec<MarkdownCell>,
28    /// Extracted definition cells
29    definition_cells: Vec<DefinitionCell>,
30    /// Source file path
31    source_file: std::path::PathBuf,
32    /// Source code (for extracting spans)
33    source_code: String,
34}
35
36impl CellParser {
37    /// Create a new parser.
38    pub fn new() -> Self {
39        Self {
40            cells: Vec::new(),
41            markdown_cells: Vec::new(),
42            definition_cells: Vec::new(),
43            source_file: std::path::PathBuf::new(),
44            source_code: String::new(),
45        }
46    }
47
48    /// Parse a source file and extract all cells.
49    pub fn parse_file(&mut self, path: &Path) -> Result<ParseResult> {
50        let source = std::fs::read_to_string(path)
51            .map_err(|e| Error::Parse(format!("Failed to read file {}: {}", path.display(), e)))?;
52
53        self.parse_str(&source, path)
54    }
55
56    /// Parse source code string and extract all cells.
57    pub fn parse_str(&mut self, source: &str, path: &Path) -> Result<ParseResult> {
58        self.source_file = path.to_path_buf();
59        self.source_code = source.to_string();
60        self.cells.clear();
61        self.markdown_cells.clear();
62        self.definition_cells.clear();
63
64        let file: File = syn::parse_str(source)
65            .map_err(|e| Error::Parse(format!("Failed to parse {}: {}", path.display(), e)))?;
66
67        // Extract module-level doc comments (//!)
68        self.extract_module_docs(&file);
69
70        // Visit all items in the file to find code cells
71        self.visit_file(&file);
72
73        // Extract standalone // comment blocks (not attached to cells)
74        self.extract_standalone_doc_comments(source);
75
76        // Extract definition blocks (imports, types, helpers)
77        self.extract_definition_blocks(&file);
78
79        Ok(ParseResult {
80            code_cells: std::mem::take(&mut self.cells),
81            markdown_cells: std::mem::take(&mut self.markdown_cells),
82            definition_cells: std::mem::take(&mut self.definition_cells),
83        })
84    }
85
86    /// Check if a function has the #[venus::cell] attribute.
87    fn has_cell_attribute(attrs: &[Attribute]) -> bool {
88        attrs.iter().any(|attr| {
89            let path = attr.path();
90            let segments: Vec<_> = path.segments.iter().map(|s| s.ident.to_string()).collect();
91
92            // Match #[venus::cell] or #[cell] (if imported)
93            (segments.len() == 2 && segments[0] == "venus" && segments[1] == "cell")
94                || (segments.len() == 1 && segments[0] == "cell")
95        })
96    }
97
98    /// Extract doc comments from attributes.
99    fn extract_doc_comment(attrs: &[Attribute]) -> Option<String> {
100        let docs: Vec<String> = attrs
101            .iter()
102            .filter_map(|attr| {
103                if attr.path().is_ident("doc")
104                    && let syn::Meta::NameValue(nv) = &attr.meta
105                    && let syn::Expr::Lit(syn::ExprLit {
106                        lit: syn::Lit::Str(s),
107                        ..
108                    }) = &nv.value
109                {
110                    return Some(s.value());
111                }
112                None
113            })
114            .collect();
115
116        if docs.is_empty() {
117            None
118        } else {
119            // Join doc lines and trim leading space (Rust adds a space after ///)
120            Some(
121                docs.iter()
122                    .map(|s| s.strip_prefix(' ').unwrap_or(s))
123                    .collect::<Vec<_>>()
124                    .join("\n"),
125            )
126        }
127    }
128
129    /// Extract display name from doc comment.
130    ///
131    /// Looks for the first markdown heading (# Display Name) in the doc comment.
132    /// If not found, returns the function name as fallback.
133    fn extract_display_name(doc_comment: &Option<String>, function_name: &str) -> String {
134        if let Some(doc) = doc_comment {
135            for line in doc.lines() {
136                let trimmed = line.trim();
137                if let Some(heading) = trimmed.strip_prefix('#') {
138                    let display_name = heading.trim();
139                    if !display_name.is_empty() {
140                        return display_name.to_string();
141                    }
142                }
143            }
144        }
145        // Fallback to function name
146        function_name.to_string()
147    }
148
149    /// Extract the type as a string.
150    fn type_to_string(ty: &Type) -> String {
151        quote::quote!(#ty).to_string()
152    }
153
154    /// Extract dependency information from a function parameter.
155    fn extract_dependency(arg: &FnArg) -> Option<Dependency> {
156        match arg {
157            FnArg::Typed(pat_type) => {
158                // Extract parameter name
159                let param_name = match &*pat_type.pat {
160                    Pat::Ident(ident) => ident.ident.to_string(),
161                    _ => return None, // Skip complex patterns
162                };
163
164                // Skip special parameters like &mut CellContext
165                if param_name == "ctx" || param_name == "_ctx" {
166                    return None;
167                }
168
169                // Extract type information
170                let (base_type, is_ref, is_mut) = match &*pat_type.ty {
171                    Type::Reference(ref_type) => {
172                        let is_mut = ref_type.mutability.is_some();
173                        let inner_type = Self::type_to_string(&ref_type.elem);
174                        (inner_type, true, is_mut)
175                    }
176                    other => (Self::type_to_string(other), false, false),
177                };
178
179                Some(Dependency {
180                    param_name,
181                    param_type: base_type,
182                    is_ref,
183                    is_mut,
184                })
185            }
186            FnArg::Receiver(_) => None, // Skip self parameters
187        }
188    }
189
190    /// Extract return type as a string.
191    fn extract_return_type(ret: &ReturnType) -> String {
192        match ret {
193            ReturnType::Default => "()".to_string(),
194            ReturnType::Type(_, ty) => Self::type_to_string(ty),
195        }
196    }
197
198    /// Calculate source span from syn span.
199    ///
200    /// Note: proc_macro2 span locations are only available with the span-locations feature.
201    fn span_to_source_span(&self, span: proc_macro2::Span) -> SourceSpan {
202        let start = span.start();
203        let end = span.end();
204
205        SourceSpan {
206            start_line: start.line,
207            start_col: start.column,
208            end_line: end.line,
209            end_col: end.column,
210        }
211    }
212
213    /// Extract the source code for a function.
214    /// Includes the full function signature and body, excluding doc comments and attributes.
215    fn extract_source_code(&self, func: &ItemFn) -> String {
216        // Use the function signature span to get the full function including parameters
217        let sig_span = func.sig.span();
218        let body_span = func.block.brace_token.span.join();
219
220        let start = sig_span.start();
221        let end = body_span.end();
222
223        // Get lines of source code
224        let lines: Vec<&str> = self.source_code.lines().collect();
225
226        if start.line == 0 || end.line == 0 || start.line > lines.len() {
227            // Fallback: use quote to regenerate the function
228            return quote::quote!(#func).to_string();
229        }
230
231        // Extract the function source from signature to end of body (1-indexed lines)
232        let func_lines: Vec<&str> = lines
233            .iter()
234            .skip(start.line - 1)
235            .take(end.line - start.line + 1)
236            .copied()
237            .collect();
238
239        func_lines.join("\n")
240    }
241
242    /// Extract module-level doc comments (`//!`) as markdown cells.
243    /// Splits into separate cells when there are blank lines between comment blocks.
244    fn extract_module_docs(&mut self, file: &File) {
245        let mut current_block: Vec<(String, usize)> = Vec::new(); // (content, line_num)
246        let mut first_line = 0;
247        let mut last_line_in_block = 0;
248        let mut prev_line = 0;
249
250        for attr in &file.attrs {
251            // Look for #![doc = "..."] attributes (inner attributes)
252            if attr.path().is_ident("doc")
253                && matches!(attr.style, syn::AttrStyle::Inner(_))
254                && let syn::Meta::NameValue(nv) = &attr.meta
255                && let syn::Expr::Lit(syn::ExprLit {
256                    lit: syn::Lit::Str(s),
257                    ..
258                }) = &nv.value
259            {
260                let line_num = attr.span().start().line;
261
262                // If there's a gap (blank line) between this and previous doc comment, start a new block
263                if !current_block.is_empty() && line_num > prev_line + 1 {
264                    // Finalize the current block as a markdown cell
265                    self.finalize_markdown_block(&current_block, first_line, last_line_in_block);
266                    current_block.clear();
267                    first_line = line_num;
268                }
269
270                if current_block.is_empty() {
271                    first_line = line_num;
272                }
273
274                current_block.push((s.value(), line_num));
275                last_line_in_block = line_num;
276                prev_line = line_num;
277            }
278        }
279
280        // Finalize any remaining block
281        if !current_block.is_empty() {
282            self.finalize_markdown_block(&current_block, first_line, last_line_in_block);
283        }
284    }
285
286    /// Finalize a markdown block and add it as a markdown cell.
287    fn finalize_markdown_block(&mut self, block: &[(String, usize)], first_line: usize, last_line: usize) {
288        // Join doc lines and trim leading space (Rust adds a space after //!)
289        let content = block
290            .iter()
291            .map(|(s, _)| s.strip_prefix(' ').unwrap_or(s))
292            .collect::<Vec<_>>()
293            .join("\n");
294
295        let span = SourceSpan {
296            start_line: first_line,
297            start_col: 0,
298            end_line: last_line,
299            end_col: 0,
300        };
301
302        self.markdown_cells.push(MarkdownCell {
303            id: CellId::new(0), // Will be reassigned by engine
304            content,
305            span,
306            source_file: self.source_file.clone(),
307            is_module_doc: true,
308        });
309    }
310
311    /// Extract standalone // comment blocks as markdown cells.
312    /// Only extracts top-level comments (not inside functions or other blocks).
313    /// Splits on blank lines to create separate cells.
314    fn extract_standalone_doc_comments(&mut self, source: &str) {
315        let lines: Vec<&str> = source.lines().collect();
316        let mut i = 0;
317        let mut brace_depth: i32 = 0; // Track {} nesting depth
318
319        while i < lines.len() {
320            let line = lines[i];
321            let trimmed = line.trim();
322
323            // Update brace depth (simplified - doesn't handle strings/comments perfectly,
324            // but good enough since we're only looking at structure)
325            for ch in line.chars() {
326                match ch {
327                    '{' => brace_depth += 1,
328                    '}' => brace_depth = brace_depth.saturating_sub(1),
329                    _ => {}
330                }
331            }
332
333            // Only extract // comments at top level (brace_depth == 0)
334            if brace_depth == 0
335                && trimmed.starts_with("//")
336                && !trimmed.starts_with("//!")
337                && !trimmed.starts_with("///")
338            {
339                let start_line = i + 1; // 1-indexed
340                let mut comment_lines = vec![];
341                let mut j = i;
342
343                // Collect consecutive // lines (stop at blank line or non-comment)
344                while j < lines.len() {
345                    let line_trimmed = lines[j].trim();
346
347                    // Check if still at top level and still a comment
348                    if brace_depth == 0
349                        && line_trimmed.starts_with("//")
350                        && !line_trimmed.starts_with("//!")
351                        && !line_trimmed.starts_with("///")
352                    {
353                        let content = line_trimmed.strip_prefix("//").unwrap_or("");
354                        let content = content.strip_prefix(' ').unwrap_or(content);
355                        comment_lines.push(content.to_string());
356                        j += 1;
357                    } else {
358                        // Stop at blank line, non-comment, or brace
359                        break;
360                    }
361                }
362
363                // Check if the next non-empty line is code (struct, enum, fn, etc.)
364                // If so, this comment is attached to that code and should not be a markdown cell
365                let mut is_attached = false;
366                #[allow(clippy::needless_range_loop)]
367                for k in j..lines.len() {
368                    let next_line = lines[k].trim();
369                    if next_line.is_empty() {
370                        continue; // Skip blank lines
371                    }
372                    // Check if this is a code item (struct, enum, type, fn, impl, use, etc.)
373                    if next_line.starts_with("pub ")
374                        || next_line.starts_with("struct ")
375                        || next_line.starts_with("enum ")
376                        || next_line.starts_with("type ")
377                        || next_line.starts_with("fn ")
378                        || next_line.starts_with("impl ")
379                        || next_line.starts_with("use ")
380                        || next_line.starts_with("mod ")
381                        || next_line.starts_with("#[")
382                    {
383                        is_attached = true;
384                    }
385                    break; // Only check the first non-empty line
386                }
387
388                // Only create markdown cell if we have content AND it's not attached to code
389                if !comment_lines.is_empty() && !is_attached {
390                    let content = comment_lines.join("\n");
391                    let end_line = j; // j is the line after the last //
392
393                    let span = SourceSpan {
394                        start_line,
395                        start_col: 0,
396                        end_line,
397                        end_col: 0,
398                    };
399
400                    self.markdown_cells.push(MarkdownCell {
401                        id: CellId::new(0),
402                        content,
403                        span,
404                        source_file: self.source_file.clone(),
405                        is_module_doc: false,
406                    });
407                }
408
409                i = j;
410            } else {
411                i += 1;
412            }
413        }
414    }
415
416    /// Check if an item has the #[venus::hide] attribute.
417    fn has_hide_attribute(attrs: &[Attribute]) -> bool {
418        attrs.iter().any(|attr| {
419            let path = attr.path();
420            let segments: Vec<_> = path.segments.iter().map(|s| s.ident.to_string()).collect();
421
422            // Match #[venus::hide] or #[hide] (if imported)
423            (segments.len() == 2 && segments[0] == "venus" && segments[1] == "hide")
424                || (segments.len() == 1 && segments[0] == "hide")
425        })
426    }
427
428    /// Extract definition blocks (imports, types, helper functions).
429    ///
430    /// Definitions are split when markdown cells or executable cells appear between them.
431    /// Blank lines within definitions do NOT split the block.
432    /// Skips items with #[venus::hide] attribute.
433    fn extract_definition_blocks(&mut self, file: &File) {
434        use syn::Item;
435
436        let mut current_block: Vec<String> = Vec::new();
437        let mut block_start_line: Option<usize> = None;
438        let mut block_end_line: usize = 0;
439
440        for item in &file.items {
441            // Check if this is a definition item (not executable cell or other)
442            let is_definition = matches!(
443                item,
444                Item::Use(_) | Item::Struct(_) | Item::Enum(_) | Item::Type(_) | Item::Fn(_) | Item::Impl(_)
445            );
446
447            // Skip items with #[venus::hide]
448            let has_hide = match item {
449                Item::Use(item_use) => Self::has_hide_attribute(&item_use.attrs),
450                Item::Struct(item_struct) => Self::has_hide_attribute(&item_struct.attrs),
451                Item::Enum(item_enum) => Self::has_hide_attribute(&item_enum.attrs),
452                Item::Type(item_type) => Self::has_hide_attribute(&item_type.attrs),
453                Item::Impl(item_impl) => Self::has_hide_attribute(&item_impl.attrs),
454                Item::Fn(item_fn) => {
455                    Self::has_hide_attribute(&item_fn.attrs)
456                        || Self::has_cell_attribute(&item_fn.attrs) // Also skip executable cells
457                }
458                _ => false,
459            };
460
461            if has_hide || !is_definition {
462                // This item breaks the definition block - flush any accumulated definitions
463                if !current_block.is_empty() {
464                    self.flush_definition_block(&mut current_block, block_start_line.unwrap(), block_end_line);
465                    block_start_line = None;
466                }
467                continue;
468            }
469
470            // Get the span and extract original source text (preserves formatting)
471            let span = match item {
472                Item::Use(u) => self.span_to_source_span(u.span()),
473                Item::Struct(s) => self.span_to_source_span(s.span()),
474                Item::Enum(e) => self.span_to_source_span(e.span()),
475                Item::Type(t) => self.span_to_source_span(t.span()),
476                Item::Impl(i) => self.span_to_source_span(i.span()),
477                Item::Fn(f) => self.span_to_source_span(f.span()),
478                _ => unreachable!(),
479            };
480
481            // Extract the original source text with proper formatting
482            let source_text = self.extract_source_text(span.start_line, span.end_line);
483
484            // Check if there are any markdown or code cells between the last definition and this one
485            // If so, we need to split the definition block here
486            let should_split = if let Some(prev_end) = block_end_line.checked_sub(0).filter(|_| !current_block.is_empty()) {
487                // Check if any markdown cells fall between prev_end and span.start_line
488                let has_markdown_between = self.markdown_cells.iter().any(|md| {
489                    md.span.start_line > prev_end && md.span.start_line < span.start_line
490                });
491
492                // Check if any code cells fall between prev_end and span.start_line
493                let has_code_between = self.cells.iter().any(|cell| {
494                    cell.span.start_line > prev_end && cell.span.start_line < span.start_line
495                });
496
497                has_markdown_between || has_code_between
498            } else {
499                false
500            };
501
502            if should_split {
503                // Flush current block before starting a new one
504                self.flush_definition_block(&mut current_block, block_start_line.unwrap(), block_end_line);
505                current_block.clear();
506                block_start_line = Some(span.start_line);
507            } else if block_start_line.is_none() {
508                block_start_line = Some(span.start_line);
509            }
510
511            block_end_line = span.end_line;
512            current_block.push(source_text);
513        }
514
515        // Flush any remaining block
516        if !current_block.is_empty() {
517            self.flush_definition_block(&mut current_block, block_start_line.unwrap(), block_end_line);
518        }
519    }
520
521    /// Extract source text between line numbers (preserves original formatting).
522    fn extract_source_text(&self, start_line: usize, end_line: usize) -> String {
523        let lines: Vec<&str> = self.source_code.lines().collect();
524
525        // Line numbers are 1-based, convert to 0-based indices
526        let start_idx = start_line.saturating_sub(1);
527        let end_idx = end_line; // end_line is inclusive in 1-based, so this works for 0-based slice
528
529        if start_idx >= lines.len() {
530            return String::new();
531        }
532
533        let end_idx = end_idx.min(lines.len());
534        lines[start_idx..end_idx].join("\n")
535    }
536
537    /// Flush accumulated definition block into a single DefinitionCell.
538    fn flush_definition_block(&mut self, block: &mut Vec<String>, start_line: usize, end_line: usize) {
539        let combined_content = block.join("\n\n");
540
541        // Determine definition type based on content
542        let definition_type = self.infer_definition_type(&combined_content);
543
544        let definition_cell = DefinitionCell {
545            id: CellId::new(0), // Assigned later by GraphEngine
546            content: combined_content,
547            definition_type,
548            span: SourceSpan {
549                start_line,
550                start_col: 0,
551                end_line,
552                end_col: 0,
553            },
554            source_file: self.source_file.clone(),
555            doc_comment: None, // Combined blocks don't have individual doc comments
556        };
557
558        self.definition_cells.push(definition_cell);
559        block.clear();
560    }
561
562    /// Infer the definition type from the content.
563    /// Returns the specific type if the block contains only one type of definition,
564    /// otherwise returns Mixed.
565    fn infer_definition_type(&self, content: &str) -> super::types::DefinitionType {
566        use super::types::DefinitionType;
567
568        // Strip out doc comments to avoid false positives
569        let content_no_docs: String = content
570            .lines()
571            .filter(|line| {
572                let trimmed = line.trim();
573                !trimmed.starts_with("///") && !trimmed.starts_with("//!")
574            })
575            .collect::<Vec<_>>()
576            .join("\n");
577
578        // Look for definition keywords at word boundaries (not inside strings/comments)
579        let has_use = content_no_docs.lines().any(|line| {
580            let trimmed = line.trim();
581            trimmed.starts_with("use ") || trimmed.starts_with("pub use ")
582        });
583
584        let has_struct = content_no_docs.lines().any(|line| {
585            let trimmed = line.trim();
586            trimmed.starts_with("struct ") || trimmed.starts_with("pub struct ")
587        });
588
589        let has_enum = content_no_docs.lines().any(|line| {
590            let trimmed = line.trim();
591            trimmed.starts_with("enum ") || trimmed.starts_with("pub enum ")
592        });
593
594        let has_type = content_no_docs.lines().any(|line| {
595            let trimmed = line.trim();
596            trimmed.starts_with("type ") || trimmed.starts_with("pub type ")
597        });
598
599        let has_impl = content_no_docs.lines().any(|line| {
600            let trimmed = line.trim();
601            trimmed.starts_with("impl ") || trimmed.starts_with("impl<")
602        });
603
604        let has_fn = content_no_docs.lines().any(|line| {
605            let trimmed = line.trim();
606            (trimmed.starts_with("fn ") || trimmed.starts_with("pub fn "))
607                && !content.contains("#[venus::cell]")
608        });
609
610        // Special case: impl blocks contain functions, so if we have impl + fn, that's still just Impl
611        if has_impl && !has_use && !has_struct && !has_enum && !has_type {
612            return DefinitionType::Impl;
613        }
614
615        // Count how many different top-level types we have (excluding fn if impl is present)
616        let type_count = [has_use, has_struct, has_enum, has_type, has_impl, has_fn && !has_impl]
617            .iter()
618            .filter(|&&x| x)
619            .count();
620
621        // If only one type, return that specific type
622        if type_count == 1 {
623            if has_use {
624                return DefinitionType::Import;
625            }
626            if has_struct {
627                return DefinitionType::Struct;
628            }
629            if has_enum {
630                return DefinitionType::Enum;
631            }
632            if has_type {
633                return DefinitionType::TypeAlias;
634            }
635            if has_impl {
636                return DefinitionType::Impl;
637            }
638            if has_fn {
639                return DefinitionType::HelperFunction;
640            }
641        }
642
643        // Multiple types or couldn't determine - use Mixed
644        DefinitionType::Mixed
645    }
646}
647
648impl Default for CellParser {
649    fn default() -> Self {
650        Self::new()
651    }
652}
653
654impl<'ast> Visit<'ast> for CellParser {
655    fn visit_item_fn(&mut self, func: &'ast ItemFn) {
656        // Check if this function has #[venus::cell]
657        if !Self::has_cell_attribute(&func.attrs) {
658            return;
659        }
660
661        // Extract cell information
662        let name = func.sig.ident.to_string();
663
664        let dependencies: Vec<Dependency> = func
665            .sig
666            .inputs
667            .iter()
668            .filter_map(Self::extract_dependency)
669            .collect();
670
671        let return_type = Self::extract_return_type(&func.sig.output);
672
673        let doc_comment = Self::extract_doc_comment(&func.attrs);
674
675        let display_name = Self::extract_display_name(&doc_comment, &name);
676
677        let span = self.span_to_source_span(func.sig.ident.span());
678
679        let source_code = self.extract_source_code(func);
680
681        let cell = CellInfo {
682            id: CellId::new(0), // Assigned later by GraphEngine
683            name,
684            display_name,
685            dependencies,
686            return_type,
687            doc_comment,
688            source_code,
689            span,
690            source_file: self.source_file.clone(),
691        };
692
693        self.cells.push(cell);
694    }
695}
696
697#[cfg(test)]
698mod tests {
699    use super::*;
700    use std::path::PathBuf;
701
702    fn parse(source: &str) -> ParseResult {
703        let mut parser = CellParser::new();
704        parser.parse_str(source, &PathBuf::from("test.rs")).unwrap()
705    }
706
707    #[test]
708    fn test_parse_simple_cell() {
709        let source = r#"
710            use venus::prelude::*;
711
712            #[venus::cell]
713            pub fn config() -> Config {
714                Config::default()
715            }
716        "#;
717
718        let result = parse(source);
719        assert_eq!(result.code_cells.len(), 1);
720        assert_eq!(result.code_cells[0].name, "config");
721        assert!(result.code_cells[0].dependencies.is_empty());
722        assert_eq!(result.code_cells[0].return_type, "Config");
723    }
724
725    #[test]
726    fn test_parse_cell_with_dependencies() {
727        let source = r#"
728            #[venus::cell]
729            pub fn process(config: &Config, data: &DataFrame) -> Result {
730                Ok(())
731            }
732        "#;
733
734        let result = parse(source);
735        assert_eq!(result.code_cells.len(), 1);
736        assert_eq!(result.code_cells[0].name, "process");
737        assert_eq!(result.code_cells[0].dependencies.len(), 2);
738
739        assert_eq!(result.code_cells[0].dependencies[0].param_name, "config");
740        assert_eq!(result.code_cells[0].dependencies[0].param_type, "Config");
741        assert!(result.code_cells[0].dependencies[0].is_ref);
742
743        assert_eq!(result.code_cells[0].dependencies[1].param_name, "data");
744        assert_eq!(result.code_cells[0].dependencies[1].param_type, "DataFrame");
745    }
746
747    #[test]
748    fn test_parse_doc_comments() {
749        let source = r#"
750            /// This is a cell
751            /// with multiple lines
752            /// of documentation.
753            #[venus::cell]
754            pub fn documented() -> i32 {
755                42
756            }
757        "#;
758
759        let result = parse(source);
760        assert_eq!(result.code_cells.len(), 1);
761        assert!(result.code_cells[0].doc_comment.is_some());
762        let doc = result.code_cells[0].doc_comment.as_ref().unwrap();
763        assert!(doc.contains("This is a cell"));
764        assert!(doc.contains("multiple lines"));
765    }
766
767    #[test]
768    fn test_parse_multiple_cells() {
769        let source = r#"
770            #[venus::cell]
771            pub fn a() -> i32 { 1 }
772
773            fn not_a_cell() {}
774
775            #[venus::cell]
776            pub fn b(a: &i32) -> i32 { *a + 1 }
777
778            #[venus::cell]
779            pub fn c(b: &i32) -> i32 { *b + 1 }
780        "#;
781
782        let result = parse(source);
783        assert_eq!(result.code_cells.len(), 3);
784        assert_eq!(result.code_cells[0].name, "a");
785        assert_eq!(result.code_cells[1].name, "b");
786        assert_eq!(result.code_cells[2].name, "c");
787    }
788
789    #[test]
790    fn test_skip_non_cell_functions() {
791        let source = r#"
792            fn regular_function() {}
793
794            pub fn another_regular() -> i32 { 0 }
795
796            #[some_other_attr]
797            fn with_other_attr() {}
798
799            #[venus::cell]
800            pub fn actual_cell() -> i32 { 42 }
801        "#;
802
803        let result = parse(source);
804        assert_eq!(result.code_cells.len(), 1);
805        assert_eq!(result.code_cells[0].name, "actual_cell");
806    }
807
808    #[test]
809    fn test_mutable_reference() {
810        let source = r#"
811            #[venus::cell]
812            pub fn mutator(data: &mut Vec<i32>) -> () {
813                data.push(1);
814            }
815        "#;
816
817        let result = parse(source);
818        assert_eq!(result.code_cells.len(), 1);
819        assert_eq!(result.code_cells[0].dependencies.len(), 1);
820        assert!(result.code_cells[0].dependencies[0].is_ref);
821        assert!(result.code_cells[0].dependencies[0].is_mut);
822    }
823
824    #[test]
825    fn test_skip_ctx_parameter() {
826        let source = r#"
827            #[venus::cell]
828            pub fn with_context(ctx: &mut CellContext, data: &DataFrame) -> Result {
829                Ok(())
830            }
831        "#;
832
833        let result = parse(source);
834        assert_eq!(result.code_cells.len(), 1);
835        // ctx should be skipped
836        assert_eq!(result.code_cells[0].dependencies.len(), 1);
837        assert_eq!(result.code_cells[0].dependencies[0].param_name, "data");
838    }
839
840    #[test]
841    fn test_cell_attribute_shorthand() {
842        let source = r#"
843            use venus::cell;
844
845            #[cell]
846            pub fn shorthand() -> i32 { 42 }
847        "#;
848
849        let result = parse(source);
850        assert_eq!(result.code_cells.len(), 1);
851        assert_eq!(result.code_cells[0].name, "shorthand");
852    }
853
854    #[test]
855    fn test_generic_return_type() {
856        let source = r#"
857            #[venus::cell]
858            pub fn generic_cell() -> Result<DataFrame, Error> {
859                Ok(DataFrame::new())
860            }
861        "#;
862
863        let result = parse(source);
864        assert_eq!(result.code_cells.len(), 1);
865        assert!(result.code_cells[0].return_type.contains("Result"));
866        assert!(result.code_cells[0].return_type.contains("DataFrame"));
867    }
868
869    #[test]
870    fn test_markdown_cell_splitting() {
871        let source = r#"
872#[venus::cell]
873pub fn config() -> i32 {
874    42
875}
876
877// # First Markdown Cell
878//
879// Edit this content...
880
881
882// # Second Markdown Cell
883//
884// Edit this content...
885"#;
886
887        let result = parse(source);
888
889        println!("\n=== Parse Result ===");
890        println!("Code cells: {}", result.code_cells.len());
891        println!("Markdown cells: {}", result.markdown_cells.len());
892        for (i, md) in result.markdown_cells.iter().enumerate() {
893            println!("\nMarkdown cell {}:", i);
894            println!("  Lines: {}-{}", md.span.start_line, md.span.end_line);
895            println!("  Content: {:?}", md.content);
896        }
897
898        assert_eq!(result.code_cells.len(), 1, "Should have 1 code cell");
899        assert_eq!(result.markdown_cells.len(), 2, "Should have 2 markdown cells");
900
901        // Check first markdown cell
902        assert_eq!(result.markdown_cells[0].span.start_line, 7);
903        assert!(result.markdown_cells[0].content.contains("First Markdown Cell"));
904
905        // Check second markdown cell
906        assert_eq!(result.markdown_cells[1].span.start_line, 12);
907        assert!(result.markdown_cells[1].content.contains("Second Markdown Cell"));
908    }
909
910    #[test]
911    fn test_simple_rs_file() {
912        // Parse the actual simple.rs file to see what we get
913        let path = std::env::current_dir()
914            .unwrap()
915            .join("../../examples/simple.rs");
916        if !path.exists() {
917            println!("Skipping test - simple.rs not found at {:?}", path);
918            return;
919        }
920        let source = std::fs::read_to_string(&path).unwrap();
921        let result = parse(&source);
922
923        println!("\n=== simple.rs Parse Result ===");
924        println!("Code cells: {}", result.code_cells.len());
925        println!("Markdown cells: {}", result.markdown_cells.len());
926        for (i, md) in result.markdown_cells.iter().enumerate() {
927            println!("\nMarkdown cell {}:", i);
928            println!("  Lines: {}-{}", md.span.start_line, md.span.end_line);
929            println!("  Content length: {}", md.content.len());
930            println!("  Content preview: {:?}", &md.content.chars().take(100).collect::<String>());
931        }
932    }
933
934    #[test]
935    fn test_data_analysis_rs_file() {
936        // Parse data-analysis.rs to verify definition cells are properly split
937        let path = std::env::current_dir()
938            .unwrap()
939            .join("../../examples/data-analysis.rs");
940        if !path.exists() {
941            println!("Skipping test - data-analysis.rs not found at {:?}", path);
942            return;
943        }
944        let source = std::fs::read_to_string(&path).unwrap();
945        let result = parse(&source);
946
947        println!("\n=== data-analysis.rs Parse Result ===");
948        println!("Code cells: {}", result.code_cells.len());
949        println!("Markdown cells: {}", result.markdown_cells.len());
950        println!("Definition cells: {}", result.definition_cells.len());
951
952        println!("\n=== Code Cells ===");
953        for (i, cell) in result.code_cells.iter().enumerate() {
954            println!("Cell {}: {} (line {})", i, cell.name, cell.span.start_line);
955        }
956
957        println!("\n=== Markdown Cells ===");
958        for (i, md) in result.markdown_cells.iter().enumerate() {
959            println!("Markdown {}: lines {}-{}", i, md.span.start_line, md.span.end_line);
960            let preview: String = md.content.lines().take(2).collect::<Vec<_>>().join(" / ");
961            println!("  Content: {:?}", preview);
962        }
963
964        println!("\n=== Definition Cells ===");
965        for (i, def) in result.definition_cells.iter().enumerate() {
966            println!("Definition {}: lines {}-{} (type: {:?})", i, def.span.start_line, def.span.end_line, def.definition_type);
967            let preview: String = def.content.lines().take(2).collect::<Vec<_>>().join(" / ");
968            println!("  Content: {:?}", preview);
969        }
970
971        // Expected structure:
972        // Markdown cell: lines 1-11 (module doc)
973        // Definition cell: lines 15-16 (use statements) - Import type
974        // Markdown cell: lines 18 (Data Structures separator)
975        // Definition cell: lines 20-67 (struct definitions) - Struct type
976        // Markdown cell: lines 69 (Cells separator)
977        // Code cells: 75+
978        // Note: impl blocks have #[venus::hide] so they won't appear as definition cells
979
980        // We should have 2 definition cells (imports and structs, impl blocks are hidden)
981        assert_eq!(result.definition_cells.len(), 2, "Expected 2 definition cells, got {}", result.definition_cells.len());
982
983        // First definition cell should be imports (use statements)
984        use crate::graph::DefinitionType;
985        assert_eq!(result.definition_cells[0].definition_type, DefinitionType::Import, "First definition should be Import type");
986
987        // Second definition cell should be structs
988        assert_eq!(result.definition_cells[1].definition_type, DefinitionType::Struct, "Second definition should be Struct type");
989
990        // Check we have the expected code cells
991        assert!(result.code_cells.len() >= 7, "Expected at least 7 code cells, got {}", result.code_cells.len());
992
993        // Verify the specific cells that were reported as broken exist
994        assert!(result.code_cells.iter().any(|c| c.name == "category_analysis"), "category_analysis cell should exist");
995        assert!(result.code_cells.iter().any(|c| c.name == "report"), "report cell should exist");
996    }
997}